diff --git a/.gitignore b/.gitignore
index f6b6f99e..d7696fdf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,8 @@ coverage/
*.key
kv.txt
kv_azure.txt
+
+# Local LLM models & venvs
+__LOCAL_LLMs/models/
+__LOCAL_LLMs/.venv-*/
+__LOCAL_LLMs/*.wav
diff --git a/__LOCAL_LLMs/dashboard/src/app/(mission-control)/mission-control/components/MemoryDrilldown.tsx b/__LOCAL_LLMs/dashboard/src/app/(mission-control)/mission-control/components/MemoryDrilldown.tsx
new file mode 100644
index 00000000..7682bfc2
--- /dev/null
+++ b/__LOCAL_LLMs/dashboard/src/app/(mission-control)/mission-control/components/MemoryDrilldown.tsx
@@ -0,0 +1,267 @@
+'use client';
+
+import { useState, useEffect } from 'react';
+import { RefreshCw, Cpu, HardDrive, Archive, Layers, Zap } from 'lucide-react';
+import { formatBytes } from '../../../lib/format';
+import { ProgressBar } from '../../../components/ProgressBar';
+
+interface VmCategory {
+ active: number;
+ wired: number;
+ compressor: number;
+ inactive: number;
+ purgeable: number;
+ speculative: number;
+ free: number;
+}
+
+interface GroupedProcess {
+ name: string;
+ rss: number;
+ pctMem: number;
+ count: number;
+ pids: number[];
+}
+
+interface MemoryDrilldownData {
+ totalRam: number;
+ categories: VmCategory;
+ processes: GroupedProcess[];
+}
+
+const CATEGORY_META: Record<
+ keyof VmCategory,
+ { label: string; color: string; description: string }
+> = {
+ active: {
+ label: 'Active',
+ color: 'var(--accent-primary)',
+ description: 'Pages recently used by apps',
+ },
+ wired: {
+ label: 'Wired',
+ color: 'var(--danger)',
+ description: 'Kernel & drivers — cannot be paged out',
+ },
+ compressor: {
+ label: 'Compressed',
+ color: 'var(--warning)',
+ description: 'Pages compressed to save RAM (still counts as used)',
+ },
+ inactive: {
+ label: 'Inactive',
+ color: 'var(--accent-secondary)',
+ description: 'Recently freed — reclaimable on demand',
+ },
+ purgeable: {
+ label: 'Purgeable',
+ color: 'var(--purple)',
+ description: 'Cache that macOS can discard immediately',
+ },
+ speculative: {
+ label: 'Speculative',
+ color: 'var(--text-tertiary)',
+ description: 'Pre-fetched pages — reclaimable',
+ },
+ free: {
+ label: 'Free',
+ color: 'var(--success)',
+ description: 'Unused pages — immediately available',
+ },
+};
+
+export function MemoryDrilldown() {
+ const [data, setData] = useState(null);
+ const [loading, setLoading] = useState(true);
+
+ const fetchData = async () => {
+ setLoading(true);
+ try {
+ const res = await fetch('/api/system/memory');
+ if (res.ok) setData(await res.json());
+ } catch {
+ // ignore
+ }
+ setLoading(false);
+ };
+
+ useEffect(() => {
+ fetchData();
+ }, []);
+
+ if (loading && !data) {
+ return (
+
+
+
+ );
+ }
+ if (!data) return null;
+
+ const total = data.totalRam;
+ const cats = data.categories;
+ const appMemory = cats.active + cats.wired + cats.compressor;
+
+ return (
+
+ {/* Category breakdown header */}
+
+
+ Memory Categories (vm_stat)
+
+
+
+
+ {/* Stacked bar */}
+
+ {(Object.keys(CATEGORY_META) as (keyof VmCategory)[]).map(key => {
+ const bytes = cats[key];
+ const pct = (bytes / total) * 100;
+ if (pct < 0.3) return null;
+ const meta = CATEGORY_META[key];
+ return (
+
+ {pct > 6 ? meta.label : ''}
+
+ );
+ })}
+
+
+ {/* Legend grid */}
+
+ {(Object.keys(CATEGORY_META) as (keyof VmCategory)[]).map(key => {
+ const bytes = cats[key];
+ const pct = (bytes / total) * 100;
+ const meta = CATEGORY_META[key];
+ const isApp = key === 'active' || key === 'wired' || key === 'compressor';
+ return (
+
+
+
+
+ {meta.label}
+
+
+
+ {formatBytes(bytes)}
+ ({pct.toFixed(1)}%)
+
+
+ );
+ })}
+
+
+ {/* Summary line */}
+
+
+ App memory (active + wired + compressed)
+
+
+ {formatBytes(appMemory)}
+
+
+
+ {/* Top processes */}
+
+
+ Top Processes by Memory
+
+
+
+ {data.processes.slice(0, 15).map((proc, i) => {
+ const pct = (proc.rss / total) * 100;
+ const isOllama = proc.name.toLowerCase().includes('ollama');
+ const isNode =
+ proc.name.toLowerCase().includes('node') || proc.name.toLowerCase().includes('next');
+ return (
+
+
+
+ {isOllama ? (
+
+ ) : isNode ? (
+
+ ) : (
+
+ )}
+
+ {proc.name}
+ {proc.count > 1 && (
+ ×{proc.count}
+ )}
+
+
+
+ {formatBytes(proc.rss)}
+ ({pct.toFixed(1)}%)
+
+
+
+
+ );
+ })}
+
+
+ );
+}
diff --git a/__LOCAL_LLMs/dashboard/src/app/(mission-control)/mission-control/page.tsx b/__LOCAL_LLMs/dashboard/src/app/(mission-control)/mission-control/page.tsx
index b9ae8913..a8eeae33 100644
--- a/__LOCAL_LLMs/dashboard/src/app/(mission-control)/mission-control/page.tsx
+++ b/__LOCAL_LLMs/dashboard/src/app/(mission-control)/mission-control/page.tsx
@@ -36,6 +36,7 @@ import {
Star,
MessageSquare,
Settings,
+ Volume2,
} from 'lucide-react';
import type {
OllamaData,
@@ -57,6 +58,7 @@ import { ProgressBar } from '../../components/ProgressBar';
import { Sparkline } from '../../components/Sparkline';
import { RamBudgetBar } from './components/RamBudgetBar';
import { MarkdownResponse } from './components/MarkdownResponse';
+import { MemoryDrilldown } from './components/MemoryDrilldown';
export default function Dashboard() {
const [ollama, setOllama] = useState(null);
@@ -129,6 +131,19 @@ export default function Dashboard() {
>([]);
const [showInferenceLog, setShowInferenceLog] = useState(false);
const [inferenceSearch, setInferenceSearch] = useState('');
+ const [showMemoryDrilldown, setShowMemoryDrilldown] = useState(false);
+ const [ttsData, setTtsData] = useState<{
+ engines: Array<{
+ name: string;
+ type: 'ollama' | 'python';
+ status: 'ready' | 'partial' | 'missing';
+ model: string;
+ size?: string;
+ voices?: string[];
+ details: string;
+ }>;
+ venv: { exists: boolean; packages?: string[] };
+ } | null>(null);
const responseRef = useRef(null);
const abortRef = useRef(null);
const compareAbortRef = useRef(null);
@@ -158,6 +173,13 @@ export default function Dashboard() {
setMemoryHistory(prev => [...prev.slice(-29), sRes.value.memory.appMemory]);
}
}
+ // TTS engine status
+ try {
+ const tRes = await fetch('/api/tts');
+ if (tRes.ok) setTtsData(await tRes.json());
+ } catch {
+ /* ignore */
+ }
// F15: Check extraction service health via server-side proxy (avoids browser CORS/console errors)
try {
const eRes = await fetch('/api/extraction/health');
@@ -1143,21 +1165,33 @@ export default function Dashboard() {
-
+
setShowMemoryDrilldown(prev => !prev)}
+ style={{
+ outline: showMemoryDrilldown ? '2px solid var(--warning)' : 'none',
+ outlineOffset: '-1px',
+ }}
+ title="Click to see memory drilldown"
+ >
MEMORY
+
+ {showMemoryDrilldown ? '▲ hide' : '▼ drilldown'}
+
{formatBytes(system?.memory.appMemory || 0)}
- / {formatBytes(system?.memory.total || 0)}
+ used / {formatBytes(system?.memory.total || 0)}
-
- {formatBytes(system?.memory.cached || 0)} cached (reclaimable)
+
+ {formatBytes((system?.memory.free || 0) + (system?.memory.cached || 0) * 0.9)}{' '}
+ available for models
+ {/* Memory Drilldown Panel */}
+ {showMemoryDrilldown && (
+
+
+
+ Memory Drilldown
+
+
+
+ )}
+
{/* Main Grid */}
{/* Ollama Models — 2 cols */}
@@ -1351,7 +1396,7 @@ export default function Dashboard() {
totalRam={system.memory.total}
appMemory={system.memory.appMemory}
runningModels={ollama.running}
- freeRam={system.memory.free}
+ freeRam={system.memory.free + system.memory.cached}
/>
)}
{ollama.models
@@ -1456,20 +1501,36 @@ export default function Dashboard() {
)}
+ {/* Metrics row */}
- {formatBytes(model.size)}
+
+
+ {formatBytes(model.size)}
+
{model.details?.parameter_size && (
- {model.details.parameter_size}
+
+
+ {model.details.parameter_size}
+
)}
{model.details?.quantization_level && (
- {model.details.quantization_level}
+
+ {model.details.quantization_level}
+
)}
-
- ~{formatBytes(estRam)} RAM
-
{(() => {
const ctx = modelMetadata[model.name]?.contextLength;
return ctx ? (
@@ -1486,7 +1547,86 @@ export default function Dashboard() {
~{modelBenchmarks[model.name].tokPerSec.toFixed(1)} tok/s
)}
+ {(() => {
+ const ps = parseFloat(model.details?.parameter_size || '0');
+ const tier =
+ ps <= 3
+ ? { label: 'Tiny · Instant', color: 'var(--success)' }
+ : ps <= 8
+ ? { label: 'Small · Fast', color: 'var(--accent-secondary)' }
+ : ps <= 14
+ ? { label: 'Medium', color: 'var(--accent-primary)' }
+ : ps <= 34
+ ? { label: 'Large · Slow', color: 'var(--warning)' }
+ : { label: 'XL · Very Slow', color: 'var(--danger)' };
+ return (
+
+ {tier.label}
+
+ );
+ })()}
+ {/* Memory fit — only for non-running models */}
+ {!running &&
+ system &&
+ (() => {
+ const avail = system.memory.free + system.memory.cached * 0.9;
+ const gap = avail - estRam;
+ const fitColor =
+ fitStatus === 'fits'
+ ? 'var(--success)'
+ : fitStatus === 'tight'
+ ? 'var(--warning)'
+ : 'var(--danger)';
+ return (
+
+
+
+ Needs ~{formatBytes(estRam)} · {formatBytes(avail)}{' '}
+ available
+
+
+ {fitStatus === 'fits'
+ ? `✓ ${formatBytes(gap)} to spare`
+ : fitStatus === 'tight'
+ ? `⚠ Tight — ${formatBytes(gap)} to spare`
+ : `✗ ${formatBytes(Math.abs(gap))} short`}
+
+
+
+
+ );
+ })()}
{running &&
(() => {
const rm = ollama?.running.find(r => r.name === model.name);
@@ -1547,26 +1687,6 @@ export default function Dashboard() {
>
) : (
- {fitStatus && !running && (
-
- )}
-
- {formatBytes(system?.memory.free || 0)} avail
+
+ {formatBytes((system?.memory.free || 0) + (system?.memory.cached || 0) * 0.9)}{' '}
+ avail
- App: {formatBytes(system?.memory.appMemory || 0)}
- Cache: {formatBytes(system?.memory.cached || 0)}
+ Used: {formatBytes(system?.memory.appMemory || 0)}
+ Total: {formatBytes(system?.memory.total || 0)}
@@ -2024,6 +2145,116 @@ export default function Dashboard() {
)}
+ {/* Speech — TTS Engines */}
+
+
+
+ Speech (TTS)
+
+ {ttsData ? (
+
+ {ttsData.engines.map(engine => (
+
+
+
+
+ {engine.name}
+
+ {engine.type === 'ollama' ? 'Ollama' : 'Python'}
+
+
+ {engine.size && (
+
+ {engine.size}
+
+ )}
+
+
+ {engine.model}
+
+
+ {engine.details}
+
+ {engine.voices && engine.status === 'ready' && (
+
+ {engine.voices.map(v => (
+
+ {v}
+
+ ))}
+
+ )}
+
+ ))}
+ {/* Venv status */}
+
+ Python venv
+
+ {ttsData.venv.exists ? (
+ <>✓ {ttsData.venv.packages?.join(' · ') || 'installed'}>
+ ) : (
+ 'Not found — run setup-tts.sh'
+ )}
+
+
+
+ ) : (
+
+
+ Loading TTS status...
+
+
+ )}
+
+
{/* Extraction Service (F15) */}
diff --git a/__LOCAL_LLMs/dashboard/src/app/api/system/memory/route.ts b/__LOCAL_LLMs/dashboard/src/app/api/system/memory/route.ts
new file mode 100644
index 00000000..69e4409a
--- /dev/null
+++ b/__LOCAL_LLMs/dashboard/src/app/api/system/memory/route.ts
@@ -0,0 +1,136 @@
+import { NextResponse } from 'next/server';
+import { exec } from 'child_process';
+import { promisify } from 'util';
+import os from 'os';
+
+const execAsync = promisify(exec);
+
+interface ProcessInfo {
+ pid: number;
+ name: string;
+ rss: number; // bytes
+ pctMem: number;
+ user: string;
+}
+
+interface VmStatBreakdown {
+ active: number;
+ wired: number;
+ compressor: number;
+ inactive: number;
+ purgeable: number;
+ speculative: number;
+ free: number;
+ pageSize: number;
+}
+
+async function getTopProcesses(limit = 20): Promise {
+ try {
+ // ps with RSS in KB, sorted descending by RSS
+ const { stdout } = await execAsync(
+ `ps -axo pid=,rss=,%mem=,user=,comm= | sort -k2 -rn | head -${limit}`,
+ { timeout: 3000 }
+ );
+ return stdout
+ .trim()
+ .split('\n')
+ .filter(Boolean)
+ .map(line => {
+ const parts = line.trim().split(/\s+/);
+ const pid = parseInt(parts[0]);
+ const rssKb = parseInt(parts[1]);
+ const pctMem = parseFloat(parts[2]);
+ const user = parts[3];
+ // comm can have spaces/slashes — take everything after user
+ const rawName = parts.slice(4).join(' ');
+ // Extract just the process name from the full path
+ const name = rawName.split('/').pop() || rawName;
+ return {
+ pid,
+ name,
+ rss: rssKb * 1024,
+ pctMem,
+ user,
+ };
+ })
+ .filter(p => p.rss > 0);
+ } catch {
+ return [];
+ }
+}
+
+async function getVmStatBreakdown(): Promise {
+ try {
+ const { stdout } = await execAsync('vm_stat', { timeout: 2000 });
+ const pageSizeMatch = stdout.match(/page size of (\d+) bytes/);
+ const pageSize = pageSizeMatch ? parseInt(pageSizeMatch[1]) : 16384;
+ const parse = (label: string): number => {
+ const match = stdout.match(new RegExp(`${label}:\\s+(\\d+)`));
+ return match ? parseInt(match[1]) * pageSize : 0;
+ };
+ return {
+ active: parse('Pages active'),
+ wired: parse('Pages wired down'),
+ compressor: parse('Pages occupied by compressor'),
+ inactive: parse('Pages inactive'),
+ purgeable: parse('Pages purgeable'),
+ speculative: parse('Pages speculative'),
+ free: parse('Pages free'),
+ pageSize,
+ };
+ } catch {
+ return {
+ active: 0,
+ wired: 0,
+ compressor: 0,
+ inactive: 0,
+ purgeable: 0,
+ speculative: 0,
+ free: 0,
+ pageSize: 16384,
+ };
+ }
+}
+
+export async function GET() {
+ const [processes, vmstat] = await Promise.all([getTopProcesses(25), getVmStatBreakdown()]);
+
+ // Group by process name and sum RSS (e.g. multiple Chrome helpers)
+ const grouped: Record =
+ {};
+ for (const p of processes) {
+ const key = p.name;
+ if (!grouped[key]) {
+ grouped[key] = { rss: 0, pctMem: 0, count: 0, pids: [] };
+ }
+ grouped[key].rss += p.rss;
+ grouped[key].pctMem += p.pctMem;
+ grouped[key].count += 1;
+ grouped[key].pids.push(p.pid);
+ }
+
+ const groupedProcesses = Object.entries(grouped)
+ .map(([name, info]) => ({
+ name,
+ rss: info.rss,
+ pctMem: Math.round(info.pctMem * 10) / 10,
+ count: info.count,
+ pids: info.pids,
+ }))
+ .sort((a, b) => b.rss - a.rss);
+
+ return NextResponse.json({
+ totalRam: os.totalmem(),
+ vmstat,
+ categories: {
+ active: vmstat.active,
+ wired: vmstat.wired,
+ compressor: vmstat.compressor,
+ inactive: vmstat.inactive,
+ purgeable: vmstat.purgeable,
+ speculative: vmstat.speculative,
+ free: vmstat.free,
+ },
+ processes: groupedProcesses,
+ });
+}
diff --git a/__LOCAL_LLMs/dashboard/src/app/api/system/route.ts b/__LOCAL_LLMs/dashboard/src/app/api/system/route.ts
index b58f1170..78ea6cbd 100644
--- a/__LOCAL_LLMs/dashboard/src/app/api/system/route.ts
+++ b/__LOCAL_LLMs/dashboard/src/app/api/system/route.ts
@@ -133,12 +133,13 @@ async function getAccurateMemory(): Promise<{
const appMemory = active + wired + compressor;
const cached = inactive + purgeable + speculative;
- const trueFree = free + cached; // macOS reclaims cached on demand
+ // Return raw free separately from cached — no overlap
+ // available for loading = free + cached (macOS reclaims cached on demand)
const ratio = appMemory / totalMem;
const pressure = ratio > 0.85 ? 'critical' : ratio > 0.7 ? 'warning' : 'normal';
- return { total: totalMem, appMemory, cached, free: trueFree, pressure };
+ return { total: totalMem, appMemory, cached, free, pressure };
} catch {
// Fallback to Node.js (inaccurate on macOS but works everywhere)
const freeMem = os.freemem();
diff --git a/__LOCAL_LLMs/dashboard/src/app/api/tts/route.ts b/__LOCAL_LLMs/dashboard/src/app/api/tts/route.ts
new file mode 100644
index 00000000..15e9ba59
--- /dev/null
+++ b/__LOCAL_LLMs/dashboard/src/app/api/tts/route.ts
@@ -0,0 +1,175 @@
+import { NextResponse } from 'next/server';
+import { exec } from 'child_process';
+import { promisify } from 'util';
+import { access, stat, readdir } from 'fs/promises';
+import { join, resolve } from 'path';
+
+const execAsync = promisify(exec);
+
+// process.cwd() = dashboard/, parent = __LOCAL_LLMs/
+const LOCAL_LLMS_DIR = resolve(process.cwd(), '..');
+
+interface TtsEngine {
+ name: string;
+ type: 'ollama' | 'python';
+ status: 'ready' | 'partial' | 'missing';
+ model: string;
+ size?: string;
+ voices?: string[];
+ details: string;
+}
+
+async function fileExists(path: string): Promise {
+ try {
+ await access(path);
+ return true;
+ } catch {
+ return false;
+ }
+}
+
+async function getFileSize(path: string): Promise {
+ try {
+ const s = await stat(path);
+ return s.size;
+ } catch {
+ return 0;
+ }
+}
+
+async function checkOrpheus(): Promise {
+ const engine: TtsEngine = {
+ name: 'Orpheus TTS',
+ type: 'ollama',
+ status: 'missing',
+ model: 'sematre/orpheus:en',
+ voices: ['tara', 'leah', 'jess', 'leo', 'dan', 'mia', 'zac', 'zoe'],
+ details: '',
+ };
+
+ // Check if Orpheus model is in Ollama
+ let hasModel = false;
+ try {
+ const res = await fetch('http://localhost:11434/api/tags', {
+ signal: AbortSignal.timeout(2000),
+ });
+ if (res.ok) {
+ const data = await res.json();
+ hasModel = data.models?.some((m: { name: string }) => m.name.includes('orpheus')) ?? false;
+ }
+ } catch {
+ // Ollama not running
+ }
+
+ // Check SNAC decoder
+ const snacPath = join(LOCAL_LLMS_DIR, 'models', 'snac_24khz', 'pytorch_model.bin');
+ const hasSnac = await fileExists(snacPath);
+ const snacSize = hasSnac ? await getFileSize(snacPath) : 0;
+
+ // Check Python venv
+ const venvPython = join(LOCAL_LLMS_DIR, '.venv-qwen-tts', 'bin', 'python');
+ const hasVenv = await fileExists(venvPython);
+
+ if (hasModel && hasSnac && hasVenv) {
+ engine.status = 'ready';
+ engine.size = `${(snacSize / 1e6).toFixed(0)} MB decoder`;
+ engine.details = 'Ollama model + SNAC decoder + Python venv';
+ } else if (hasModel) {
+ engine.status = 'partial';
+ const missing: string[] = [];
+ if (!hasSnac) missing.push('SNAC decoder');
+ if (!hasVenv) missing.push('Python venv');
+ engine.details = `Missing: ${missing.join(', ')}`;
+ } else {
+ engine.status = 'missing';
+ engine.details = 'Run: bash setup-tts.sh';
+ }
+
+ return engine;
+}
+
+async function checkQwenTts(): Promise {
+ const engine: TtsEngine = {
+ name: 'Qwen3-TTS',
+ type: 'python',
+ status: 'missing',
+ model: 'Qwen3-TTS-12Hz-0.6B-CustomVoice',
+ details: '',
+ };
+
+ const modelDir = join(LOCAL_LLMS_DIR, 'models', 'Qwen3-TTS-12Hz-0.6B-CustomVoice');
+ const tokenizerDir = join(LOCAL_LLMS_DIR, 'models', 'Qwen3-TTS-Tokenizer-12Hz');
+
+ let hasModel = false;
+ let modelSize = 0;
+ try {
+ const files = await readdir(modelDir);
+ const safetensors = files.find(f => f.endsWith('.safetensors'));
+ if (safetensors) {
+ hasModel = true;
+ modelSize = await getFileSize(join(modelDir, safetensors));
+ }
+ } catch {
+ // dir doesn't exist
+ }
+
+ const hasTokenizer = await fileExists(join(tokenizerDir, 'config.json'));
+ const venvPython = join(LOCAL_LLMS_DIR, '.venv-qwen-tts', 'bin', 'python');
+ const hasVenv = await fileExists(venvPython);
+
+ if (hasModel && hasTokenizer && hasVenv) {
+ engine.status = 'ready';
+ engine.size = `${(modelSize / 1e9).toFixed(1)} GB`;
+ engine.details = '0.6B params · 10 languages · MPS/CPU';
+ } else if (hasModel || hasTokenizer) {
+ engine.status = 'partial';
+ const missing: string[] = [];
+ if (!hasModel) missing.push('model weights');
+ if (!hasTokenizer) missing.push('tokenizer');
+ if (!hasVenv) missing.push('Python venv');
+ engine.details = `Missing: ${missing.join(', ')}`;
+ } else {
+ engine.status = 'missing';
+ engine.details = 'Run: bash setup-tts.sh';
+ }
+
+ return engine;
+}
+
+async function checkVenv(): Promise<{
+ exists: boolean;
+ python?: string;
+ packages?: string[];
+}> {
+ const venvPython = join(LOCAL_LLMS_DIR, '.venv-qwen-tts', 'bin', 'python');
+ const exists = await fileExists(venvPython);
+ if (!exists) return { exists: false };
+
+ try {
+ const { stdout } = await execAsync(
+ `"${venvPython}" -c "import snac; import torch; print(f'snac={snac.__version__} torch={torch.__version__}')"`,
+ { timeout: 5000 }
+ );
+ return {
+ exists: true,
+ python: venvPython,
+ packages: stdout.trim().split(' '),
+ };
+ } catch {
+ return { exists: true, python: venvPython };
+ }
+}
+
+export async function GET() {
+ const [orpheus, qwenTts, venv] = await Promise.all([checkOrpheus(), checkQwenTts(), checkVenv()]);
+
+ return NextResponse.json({
+ engines: [orpheus, qwenTts],
+ venv,
+ setupScript: 'bash setup-tts.sh',
+ testCommands: {
+ orpheus: '.venv-qwen-tts/bin/python test_orpheus_tts.py',
+ qwenTts: '.venv-qwen-tts/bin/python test_qwen_tts.py',
+ },
+ });
+}
diff --git a/__LOCAL_LLMs/dashboard/src/app/lib/format.ts b/__LOCAL_LLMs/dashboard/src/app/lib/format.ts
index 89bdc8dc..10f9ef91 100644
--- a/__LOCAL_LLMs/dashboard/src/app/lib/format.ts
+++ b/__LOCAL_LLMs/dashboard/src/app/lib/format.ts
@@ -19,13 +19,15 @@ export function estimateRam(diskSize: number, quant?: string): number {
}
// N2: Check if model fits in available memory
+// free = raw free pages, cached = inactive+purgeable+speculative (no overlap)
+// macOS reclaims ~90% of cached on demand for large allocations (model mmaps)
export type FitStatus = 'fits' | 'tight' | 'no';
export function checkMemoryFit(
estimatedRam: number,
freeMemory: number,
cachedMemory: number
): FitStatus {
- const available = freeMemory + cachedMemory * 0.5;
+ const available = freeMemory + cachedMemory * 0.9;
const ratio = estimatedRam / available;
if (ratio < 0.7) return 'fits';
if (ratio <= 1.0) return 'tight';
diff --git a/__LOCAL_LLMs/docs/00-developer-guide.md b/__LOCAL_LLMs/docs/00-developer-guide.md
index c861fe7e..b2243983 100644
--- a/__LOCAL_LLMs/docs/00-developer-guide.md
+++ b/__LOCAL_LLMs/docs/00-developer-guide.md
@@ -10,10 +10,13 @@ This machine runs a local LLM server via [Ollama](https://ollama.com), exposing
**Models installed:**
-| Model | Size | Best For |
-| ------------------- | ------- | ----------------------------------------- |
-| `qwen2.5-coder:32b` | 18.5 GB | Code (TS, Python, Swift), structured JSON |
-| `llama3.1:8b` | 4.7 GB | Fast evals, general tasks |
+| Model | Size | Best For |
+| -------------------- | ------ | -------------------------------------------- |
+| `qwen2.5-coder:32b` | 19 GB | Code (TS, Python, Swift), structured JSON |
+| `qwen2.5-coder:7b` | 4.7 GB | Fast code tasks, fits alongside other models |
+| `deepseek-r1:32b` | 19 GB | Complex reasoning, chain-of-thought |
+| `llama3.1:8b` | 4.9 GB | Fast evals, general tasks |
+| `sematre/orpheus:en` | 4 GB | Text-to-speech (8 voices, emotion tags) |
---
diff --git a/__LOCAL_LLMs/docs/05-mission-control-dashboard.md b/__LOCAL_LLMs/docs/05-mission-control-dashboard.md
index f8f70ae4..95e85fc6 100644
--- a/__LOCAL_LLMs/docs/05-mission-control-dashboard.md
+++ b/__LOCAL_LLMs/docs/05-mission-control-dashboard.md
@@ -1,17 +1,103 @@
# 05 — Mission Control Dashboard
-> **Documentation has moved.** All dashboard docs now live in the dashboard directory.
-
-- **PRD:** [`__LOCAL_LLMs/dashboard/docs/DASHBOARD_PRD.md`](../dashboard/docs/DASHBOARD_PRD.md)
-- **Review (39 items):** [`__LOCAL_LLMs/dashboard/docs/DASHBOARD_REVIEW.md`](../dashboard/docs/DASHBOARD_REVIEW.md)
-- **Roadmap (N1–N15):** [`__LOCAL_LLMs/dashboard/docs/DASHBOARD_ROADMAP.md`](../dashboard/docs/DASHBOARD_ROADMAP.md)
+> Next.js 16 dashboard for managing local LLM models, system resources, and inference.
+> Last updated: 2026-02-21
## Quick Start
```bash
cd __LOCAL_LLMs/dashboard
npm install # first time only
-npm run dev -- -p 3100
+npm run dev # runs on port 3000
```
-Open: **http://localhost:3100**
+Open: **http://localhost:3000**
+
+---
+
+## Recent Changes (Feb 2026)
+
+### Memory Calculation Fix
+
+**Root cause:** The system API (`/api/system`) computed `trueFree = free + cached` and returned it as `free`. This made `free` and `cached` overlap. The UI then did `available = free + cached * 0.5`, which **double-counted** cached memory and inflated available RAM by ~8 GB.
+
+**Fix (4 files):**
+
+- `src/app/api/system/route.ts` — Return raw `Pages free` separately from `cached` (no overlap)
+- `src/app/lib/format.ts` — Updated `checkMemoryFit()` to use `cached × 0.9` (macOS reclaims ~90% on demand)
+- `src/app/(mission-control)/mission-control/page.tsx` — All UI memory references fixed
+- `src/app/(mission-control)/mission-control/components/RamBudgetBar.tsx` — Receives corrected `free + cached`
+
+**Memory formula:** `available for models = rawFree + cached × 0.9`
+
+### Memory Drilldown
+
+Click the **MEMORY** card in the status bar to toggle a drilldown panel showing:
+
+1. **Stacked bar** — vm_stat categories (Active, Wired, Compressed, Inactive, Purgeable, Free)
+2. **Legend grid** — exact bytes + percentage for each category
+3. **App memory summary** — Active + Wired + Compressed = total used
+4. **Top 15 processes by RSS** — grouped by name, Ollama highlighted in green
+
+**New files:**
+
+- `src/app/api/system/memory/route.ts` — Process memory API (`ps` + `vm_stat`)
+- `src/app/(mission-control)/mission-control/components/MemoryDrilldown.tsx` — Drilldown UI
+
+### Simplified Memory UI
+
+All memory displays now use consistent, plain language:
+
+| Element | Before (confusing) | After (clear) |
+| -------------------- | ---------------------------------- | ------------------------------------------- |
+| **MEMORY card** | "10.5 GB / 48 GB" (ambiguous) | **"35.6 GB used / 48 GB"** |
+| **Subtitle** | "App: 35.6 GB · Cache: 11.6 GB" | **"10.5 GB available for models"** (green) |
+| **Model fit** | "76 MB free + 10.5 GB reclaimable" | **"Needs ~22 GB · 10.5 GB available"** |
+| **Fit badge** | "✗ Won't fit" | **"✗ 11.6 GB short"** (with exact gap) |
+| **System panel RAM** | "76 MB avail" | **"10.5 GB avail"** (green, matches header) |
+
+---
+
+## Detailed Documentation
+
+- **PRD:** [`dashboard/docs/DASHBOARD_PRD.md`](../dashboard/docs/DASHBOARD_PRD.md)
+- **Review (39 items):** [`dashboard/docs/DASHBOARD_REVIEW.md`](../dashboard/docs/DASHBOARD_REVIEW.md)
+- **Roadmap (N1–N15):** [`dashboard/docs/DASHBOARD_ROADMAP.md`](../dashboard/docs/DASHBOARD_ROADMAP.md)
+- **Rich Features Roadmap (A–G):** [`dashboard/docs/RICH_FEATURES_ROADMAP.md`](../dashboard/docs/RICH_FEATURES_ROADMAP.md)
+
+---
+
+## API Routes
+
+| Route | Method | Description |
+| -------------------- | -------- | ---------------------------------------------------- |
+| `/api/ollama` | GET/POST | Ollama proxy (list, load, unload, generate) |
+| `/api/whisper` | GET | Whisper binary/model discovery |
+| `/api/system` | GET | System info (chip, RAM, disk, brew, pressure) |
+| `/api/system/memory` | GET | Memory drilldown (vm_stat breakdown + top processes) |
+| `/api/system/exec` | POST | Safe shell command execution |
+
+---
+
+## Key Components
+
+```
+dashboard/src/app/
+├── (mission-control)/mission-control/
+│ ├── page.tsx # Main Mission Control page
+│ └── components/
+│ ├── RamBudgetBar.tsx # Stacked RAM budget visualization
+│ ├── MemoryDrilldown.tsx # Process-level memory breakdown
+│ └── MarkdownResponse.tsx # Markdown renderer for LLM output
+├── (workspace)/components/ # Chat workspace (conversations, messages)
+├── api/
+│ ├── ollama/route.ts
+│ ├── whisper/route.ts
+│ ├── system/route.ts
+│ └── system/memory/route.ts
+└── lib/
+ ├── format.ts # formatBytes, estimateRam, checkMemoryFit
+ ├── db.ts # IndexedDB CRUD (conversations, projects, tasks)
+ ├── cron.ts # Cron expression parser
+ └── scheduled-tasks.ts # Built-in task templates
+```
diff --git a/__LOCAL_LLMs/docs/08-troubleshooting.md b/__LOCAL_LLMs/docs/08-troubleshooting.md
index b70bdade..596f7dd8 100644
--- a/__LOCAL_LLMs/docs/08-troubleshooting.md
+++ b/__LOCAL_LLMs/docs/08-troubleshooting.md
@@ -19,19 +19,41 @@ This machine is behind an AT&T Forcepoint proxy that performs SSL deep packet in
### What Works Through Proxy
-| Tool | Status | Notes |
-| -------------------------- | ---------- | ------------------------------------- |
-| `ollama pull` | ✅ Works | Ollama handles proxy natively |
-| `brew install` | ✅ Works | Homebrew handles proxy |
-| `npm install` | ✅ Works | With `NODE_TLS_REJECT_UNAUTHORIZED=0` |
-| `curl` to Hugging Face | ❌ Blocked | Returns 19 KB HTML redirect page |
-| `curl -k` to Hugging Face | ❌ Blocked | Still intercepted even with `-k` |
-| `python requests` to HF | ❌ Blocked | SSL_CERTIFICATE_VERIFY_FAILED |
-| `huggingface_hub` download | ❌ Blocked | Falls back to cached (broken) files |
+| Tool | Status | Notes |
+| -------------------------- | ---------- | ------------------------------------------- |
+| `ollama pull` | ✅ Works | Ollama handles proxy natively |
+| `brew install` | ✅ Works | Homebrew handles proxy |
+| `npm install` | ✅ Works | With `NODE_TLS_REJECT_UNAUTHORIZED=0` |
+| `git clone` (GitHub) | ✅ Works | With `GIT_SSL_NO_VERIFY=1` |
+| `pip install` (PyPI) | ✅ Works | Via corporate Artifactory mirror |
+| **`hf-mirror.com`** | ✅ Works | Chinese HuggingFace mirror, **not blocked** |
+| `curl` to Hugging Face | ❌ Blocked | Returns 19 KB HTML redirect page |
+| `curl -k` to Hugging Face | ❌ Blocked | Still intercepted even with `-k` |
+| `python requests` to HF | ❌ Blocked | SSL_CERTIFICATE_VERIFY_FAILED |
+| `huggingface_hub` download | ❌ Blocked | Falls back to cached (broken) files |
-### Workaround: Download Off-Network
+### Workaround 1: Use hf-mirror.com (recommended)
-For Hugging Face model downloads (e.g., Whisper GGML files):
+`hf-mirror.com` is a Chinese mirror of HuggingFace that **is NOT blocked** by Forcepoint. Replace `huggingface.co` with `hf-mirror.com` in any download URL:
+
+```bash
+# Instead of: https://huggingface.co/org/model/resolve/main/file.bin
+# Use: https://hf-mirror.com/org/model/resolve/main/file.bin
+
+# Example: download SNAC decoder (TTS)
+curl -k -L -o models/snac_24khz/pytorch_model.bin \
+ "https://hf-mirror.com/hubertsiuzdak/snac_24khz/resolve/main/pytorch_model.bin"
+
+# Example: download Whisper model
+curl -k -L -o ~/whisper-models/ggml-large-v3-turbo.bin \
+ "https://hf-mirror.com/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo.bin"
+```
+
+The TTS scripts (`setup-tts.sh`, `download-tts-models.sh`) use this mirror automatically.
+
+### Workaround 2: Download Off-Network
+
+If the mirror is also blocked, use a non-corporate network:
1. **Disconnect** from corporate VPN/Wi-Fi
2. **Connect** to personal hotspot or home Wi-Fi
diff --git a/__LOCAL_LLMs/docs/10-text-to-speech.md b/__LOCAL_LLMs/docs/10-text-to-speech.md
new file mode 100644
index 00000000..6df2669f
--- /dev/null
+++ b/__LOCAL_LLMs/docs/10-text-to-speech.md
@@ -0,0 +1,230 @@
+# 10 — Text-to-Speech (TTS) — Local Setup
+
+> Local TTS on Apple Silicon: Orpheus TTS via Ollama + Qwen3-TTS 0.6B direct.
+> Works through corporate proxy via `hf-mirror.com`.
+> Last updated: 2026-02-21
+
+---
+
+## Overview
+
+Two TTS engines for local speech generation — both run fully offline after initial setup.
+
+| Engine | Model | Size | How It Runs | Quality | Speed |
+| --------------- | --------------------------------- | ------ | ----------------------- | ------------------------------------------ | ------------------------ |
+| **Orpheus TTS** | `sematre/orpheus:en` | 4 GB | Via Ollama (Metal GPU) | Great — expressive, 8 voices, emotion tags | ~11s for short sentences |
+| **Qwen3-TTS** | `Qwen3-TTS-12Hz-0.6B-CustomVoice` | 1.2 GB | Direct Python (MPS/CPU) | Excellent — 10 languages, voice design | ~10-20s on MPS |
+
+### Architecture
+
+```
+Text → Ollama (Orpheus 3B) → Audio Tokens → SNAC Decoder → WAV file
+Text → Qwen3-TTS 0.6B (PyTorch MPS) → WAV file
+```
+
+---
+
+## Quick Start (Fresh Laptop)
+
+The **one-shot setup script** handles everything — works on any Apple Silicon Mac, including through corporate proxy:
+
+```bash
+cd __LOCAL_LLMs
+bash setup-tts.sh
+```
+
+This installs: Python 3.12, venv, pip packages, Orpheus model (Ollama), SNAC decoder (hf-mirror.com), and optionally Qwen3-TTS 0.6B.
+
+After setup:
+
+```bash
+.venv-qwen-tts/bin/python test_orpheus_tts.py
+afplay test_orpheus_tara.wav
+```
+
+---
+
+## Prerequisites
+
+| Component | How to Install | Notes |
+| ------------------------- | ---------------------------------- | ------------------------------ |
+| **macOS + Apple Silicon** | — | M1/M2/M3/M4 (MPS acceleration) |
+| **Homebrew** | `/bin/bash -c "$(curl -fsSL ...)"` | Package manager |
+| **Ollama** | `brew install ollama` | Local LLM server |
+| **Python 3.12** | `brew install python@3.12` | TTS packages need 3.12 |
+
+All of the above are installed automatically by `setup-tts.sh`.
+
+---
+
+## Manual Setup (step by step)
+
+If you prefer to run each step yourself instead of `setup-tts.sh`:
+
+### 1. Python Environment
+
+```bash
+cd __LOCAL_LLMs
+
+# Install Python 3.12
+brew install python@3.12
+
+# Create isolated venv
+/opt/homebrew/bin/python3.12 -m venv .venv-qwen-tts
+
+# Install packages
+.venv-qwen-tts/bin/pip install -U snac qwen-tts
+```
+
+### 2. Orpheus TTS Model (via Ollama)
+
+```bash
+ollama serve & # start Ollama if not running
+ollama pull sematre/orpheus:en # 4 GB, via Ollama registry (works through proxy)
+```
+
+### 3. SNAC Audio Decoder
+
+Downloads via `hf-mirror.com` — **works through corporate proxy**:
+
+```bash
+bash download-tts-models.sh snac # just SNAC (~76 MB)
+```
+
+Or manually:
+
+```bash
+mkdir -p models/snac_24khz
+curl -k -sL -o models/snac_24khz/config.json \
+ "https://hf-mirror.com/hubertsiuzdak/snac_24khz/raw/main/config.json"
+curl -k -L --progress-bar -o models/snac_24khz/pytorch_model.bin \
+ "https://hf-mirror.com/hubertsiuzdak/snac_24khz/resolve/main/pytorch_model.bin"
+```
+
+### 4. Qwen3-TTS 0.6B (optional)
+
+```bash
+bash download-tts-models.sh qwen # tokenizer + model (~1.7 GB)
+```
+
+After download everything runs **fully offline**.
+
+---
+
+## Usage
+
+### Orpheus TTS (via Ollama)
+
+```bash
+# Make sure Ollama is running
+ollama serve &
+
+# Run test
+.venv-qwen-tts/bin/python test_orpheus_tts.py
+
+# Play output
+afplay test_orpheus_tara.wav
+```
+
+**Voices:** `tara`, `leah`, `jess`, `leo`, `dan`, `mia`, `zac`, `zoe`
+
+**Emotion tags:** ``, ``, ``, ``, ``, ``, ``, ``
+
+```python
+# Example prompt format
+voice = "tara"
+text = " That's hilarious! Tell me more."
+prompt = f"<|begin_of_text|>{voice}: {text}<|eot_id|>"
+```
+
+### Qwen3-TTS (direct Python)
+
+```bash
+.venv-qwen-tts/bin/python test_qwen_tts.py
+afplay test_output_english.wav
+```
+
+**Features:**
+
+- 10 languages (Chinese, English, Japanese, Korean, German, French, Russian, Portuguese, Spanish, Italian)
+- Built-in speaker voices (Chelsie, Vivian, Ryan, etc.)
+- Natural language emotion control: `instruct="Speak with excitement"`
+- Voice cloning from a short audio sample (with Base model variant)
+
+---
+
+## File Inventory
+
+```
+__LOCAL_LLMs/
+├── setup-tts.sh # ← START HERE — one-shot setup for fresh laptop
+├── download-tts-models.sh # Download model weights (uses hf-mirror.com)
+├── test_orpheus_tts.py # Orpheus TTS test (Ollama + SNAC)
+├── test_qwen_tts.py # Qwen3-TTS test (direct Python)
+├── .venv-qwen-tts/ # Python 3.12 venv (gitignored, created by setup)
+├── models/ # Downloaded model weights (gitignored)
+│ ├── snac_24khz/ # SNAC audio decoder (~76 MB)
+│ ├── Qwen3-TTS-Tokenizer-12Hz/ # Qwen3-TTS tokenizer (optional)
+│ └── Qwen3-TTS-12Hz-0.6B-CustomVoice/ # Qwen3-TTS model (~1.2 GB, optional)
+└── *.wav # Generated audio output (gitignored)
+```
+
+---
+
+## OSS TTS Landscape (as of Feb 2026)
+
+### Speech-to-Text (STT)
+
+| Model | By | Notes |
+| ------------------------- | ------------------ | --------------------------------------------------- |
+| **Whisper / whisper-cpp** | OpenAI / ggerganov | Gold standard, already installed, Metal-accelerated |
+| **Faster Whisper** | SYSTRAN | 4× faster via CTranslate2 |
+| **Distil-Whisper** | Hugging Face | 6× faster, 49% fewer params |
+
+### Text-to-Speech (TTS)
+
+| Model | By | Size | Notes |
+| ---------------- | ------------ | --------- | ------------------------------------------------------- |
+| **Qwen3-TTS** ⭐ | Alibaba | 0.6B–1.7B | Best quality, 10 languages, voice cloning, Jan 2026 |
+| **Orpheus TTS** | Canopy AI | 3B | Expressive, 8 voices, emotion tags, available on Ollama |
+| **Kokoro** | HF Community | 82M | Very fast, near-commercial quality, Apache 2.0 |
+| **Piper** | Rhasspy | ONNX | Lightweight, runs on Raspberry Pi |
+| **F5-TTS** | SWivid | — | Zero-shot voice cloning, flow matching |
+| **StyleTTS 2** | Columbia U | — | Human-level quality, style diffusion |
+| **OuteTTS** | Community | — | Pure LLM-based TTS, runs via llama.cpp |
+| **Bark** | Suno | — | Speech + music + sound effects |
+
+---
+
+## Corporate Proxy Notes
+
+| Source | Status | Workaround |
+| ------------------------------------------ | ---------- | --------------------------------------------------- |
+| **Ollama registry** (`registry.ollama.ai`) | ✅ Works | Ollama pull uses its own CDN |
+| **PyPI** (via `artifact.it.att.com`) | ✅ Works | Corporate Artifactory mirror |
+| **GitHub releases** | ✅ Works | Direct download |
+| **HuggingFace** (`huggingface.co`) | ❌ Blocked | Use `hf-mirror.com` as mirror (works through proxy) |
+| **hf-mirror.com** (HF mirror) | ✅ Works | Chinese HF mirror, not blocked by Forcepoint |
+
+Forcepoint CSO intercepts HTTPS and serves a block page for HuggingFace. No SSL workaround works for `huggingface.co`. However, **`hf-mirror.com`** (a Chinese mirror of HuggingFace) is **not blocked** and can be used to download model weights:
+
+```bash
+# Download SNAC config + weights via mirror
+curl -k -L -o models/snac_24khz/config.json "https://hf-mirror.com/hubertsiuzdak/snac_24khz/raw/main/config.json"
+curl -k -L -o models/snac_24khz/pytorch_model.bin "https://hf-mirror.com/hubertsiuzdak/snac_24khz/resolve/main/pytorch_model.bin"
+```
+
+All other sources (Ollama, pip, GitHub) also work fine through the proxy.
+
+---
+
+## Troubleshooting
+
+| Problem | Fix |
+| --------------------------------------------- | ----------------------------------------------------------------------------- |
+| `OSError: couldn't connect to huggingface.co` | Use `hf-mirror.com` or run `bash setup-tts.sh` |
+| `SNAC decoder not found` | Run `bash setup-tts.sh` or `bash download-tts-models.sh snac` |
+| `Model not found at models/Qwen3-TTS-*` | Run `bash setup-tts.sh` or `bash download-tts-models.sh qwen` |
+| Orpheus generates no audio tokens | Ensure `ollama serve` is running and `ollama list` shows `sematre/orpheus:en` |
+| MPS out of memory for Qwen3-TTS | Close other apps (Windsurf uses ~18 GB). Or use `device="cpu"` in test script |
+| Slow generation on CPU | Expected for 0.6B model. MPS should be ~2-3× faster |
diff --git a/__LOCAL_LLMs/docs/DASHBOARD_REVIEW.md b/__LOCAL_LLMs/docs/DASHBOARD_REVIEW.md
deleted file mode 100644
index 6948dae1..00000000
--- a/__LOCAL_LLMs/docs/DASHBOARD_REVIEW.md
+++ /dev/null
@@ -1,310 +0,0 @@
-# Mission Control Dashboard — Bug & Improvement Review
-
-> Systematic code review of `__LOCAL_LLMs/dashboard/` (6 source files, 1,395 lines)
-> Last updated: Feb 19, 2026
-
----
-
-## File Inventory
-
-| File | Lines | Purpose |
-| ------------------------------------ | ----- | -------------------------------------------------------------------- |
-| `src/app/page.tsx` | 1,079 | Main dashboard UI (single component) |
-| `src/app/globals.css` | 91 | Design tokens, animations, base styles |
-| `src/app/layout.tsx` | 20 | Root layout (metadata, dark mode) |
-| `src/app/api/ollama/route.ts` | 117 | Ollama REST proxy (list, load, unload, pull, delete, show, generate) |
-| `src/app/api/ollama/stream/route.ts` | 38 | Ollama streaming generate proxy (NDJSON) |
-| `src/app/api/whisper/route.ts` | 66 | Whisper binary + GGML model discovery |
-| `src/app/api/system/route.ts` | 162 | System info (chip, memory via vm_stat, disk, brew) |
-
-**Stack:** Next.js 16, React 19, TailwindCSS v4, Lucide icons, TypeScript
-
----
-
-## 1. Bugs
-
-- [x] **B1. Hardcoded machine specs in header** — `page.tsx:317`
- Subtitle reads `Apple M4 Pro · 48 GB · {system?.platform}` — should use `system?.chip` and `formatBytes(system?.memory.total)` dynamically so it works on any machine.
-
-- [x] **B2. Pull model blocks UI — no progress feedback** — `api/ollama/route.ts:84-92`
- `handlePull` calls Ollama with `stream: false`. Large models (20+ GB) block for 30+ minutes. The Next.js API route will likely timeout. Must use `stream: true` and pipe progress events to the client. _(Combined with F1.)_
-
-- [x] **B3. Dead code: non-streaming `generate` action** — `api/ollama/route.ts:69-82`
- The `action === 'generate'` handler is unused — UI only uses `/api/ollama/stream`. Remove or keep as fallback with a comment.
-
-- [x] **B4. Escape key closes modal during active streaming** — `page.tsx:188-197`
- Global `keydown` handler calls `setPromptModel(null)` unconditionally. Backdrop click correctly checks `!promptLoading`. Escape should also respect `promptLoading` to prevent discarding an in-flight response.
-
-- [x] **B5. Auto-refresh (15s) fires during streaming/pull** — `page.tsx:182-185`
- `setInterval(fetchAll, 15000)` runs unconditionally. During streaming this causes background churn and potential UI flicker. Should pause while `promptLoading` or `pullLoading` is true.
-
-- [x] **B6. Toast ID collision on HMR remount** — `page.tsx:156-159`
- `toastId.current` resets to 0 on component remount during dev. Use `Date.now()` or `crypto.randomUUID()` for robust uniqueness.
-
-- [x] **B7. vm_stat page size hardcoded** — `api/system/route.ts:103`
- Hardcoded `16384`. Should parse from vm_stat's first line: `"(page size of NNNNN bytes)"` for portability.
-
-- [x] **B8. Whisper models dir not configurable** — `api/whisper/route.ts:24`
- Hardcoded to `~/whisper-models`. Should scan multiple known paths (`/opt/homebrew/share/whisper-cpp/models/`, `~/whisper-models`, `~/.cache/whisper/`) or accept `WHISPER_MODELS_DIR` env var.
-
-- [x] **B9. No AbortController for streaming fetch** — `page.tsx:250-289`
- Closing the prompt modal doesn't cancel the underlying fetch. The `reader.read()` loop continues in the background wasting CPU/bandwidth until the model finishes generating.
-
-- [x] **B10. Brew shows "Loading..." when array is empty** — `page.tsx:936-940`
- When `system.brewPackages` is `[]` (all uninstalled), displays "Loading..." instead of "No packages found". Needs to distinguish "still fetching" vs "fetched but empty".
-
-- [x] **B11. Prompt text not cleared on close without send** — `page.tsx:951-957`
- Backdrop click clears `promptText`, but Escape handler (B4 fix) should also clear it. Otherwise stale text persists when re-opening.
-
----
-
-## 2. Code Quality
-
-- [x] **CQ1. Monolithic 1,079-line single component** — `page.tsx`
- All interfaces, utilities, sub-components, and 900+ lines of JSX in one file. Extract to:
- - `components/` — StatusDot, ProgressBar, ToastContainer, PromptModal, OllamaModelsPanel, SystemPanel, WhisperPanel, BrewPanel
- - `lib/types.ts` — interfaces (OllamaModel, SystemData, etc.)
- - `lib/format.ts` — formatBytes, formatUptime
- - `lib/hooks.ts` — useAutoRefresh, useToasts, useOllamaActions
-
-- [x] **CQ2. Pervasive inline styles instead of CSS/Tailwind classes** — `page.tsx` (100+ occurrences)
- Every `style={{ color: 'var(--text-tertiary)' }}` should be a utility class. Options: custom Tailwind theme mapping, or CSS utility classes in `globals.css` (e.g., `.text-muted`).
-
-- [x] **CQ3. OLLAMA_URL duplicated** — `api/ollama/route.ts:3` + `api/ollama/stream/route.ts:3`
- Same `process.env.OLLAMA_URL || 'http://localhost:11434'` in two files. Extract to `lib/ollama-config.ts`.
-
-- [x] **CQ4. No React Error Boundary** — `page.tsx`
- Unexpected API response shape crashes the entire dashboard. Add an `error.tsx` (Next.js App Router convention) for graceful recovery.
-
-- [x] **CQ5. No loading skeleton / shimmer UI**
- Initial load shows "..." placeholders. Skeleton cards would be more polished.
-
-- [x] **CQ6. No TypeScript strict null checks in API responses**
- API route handlers catch errors but return loosely typed JSON. Add Zod validation on the Ollama/system responses to prevent runtime surprises.
-
----
-
-## 3. Features
-
-- [x] **F1. Streaming pull with progress bar** _(fixes B2)_
- Use Ollama `stream: true` for `/api/pull`. Create `/api/ollama/pull/route.ts` that pipes NDJSON progress. UI shows progress bar with `completed/total` bytes, speed, and ETA.
-
-- [x] **F2. Model search/filter**
- Search input above models list. Filter by name, family, quantization. Useful when 10+ models are installed.
-
-- [x] **F3. Prompt history (localStorage)**
- Store last 20 prompts with model name + timestamp. Dropdown in prompt modal to re-run previous prompts.
-
-- [x] **F4. Chat mode (multi-turn conversation)**
- Use Ollama `/api/chat` instead of `/api/generate`. Chat bubble layout with message history. System prompt input field.
-
-- [x] **F5. Model comparison (side-by-side)**
- Send same prompt to 2 models simultaneously. Display responses side-by-side with latency/quality comparison.
-
-- [x] **F6. Token/s metrics after generation**
- Parse `eval_count` and `eval_duration` from the final NDJSON chunk. Display tokens/second, total tokens, and latency in the response footer.
-
-- [x] **F7. System resource sparklines (time-series)**
- Ring buffer of memory/CPU snapshots (localStorage). Render mini sparkline charts in the System panel. Spot trends over time.
-
-- [x] **F8. Ollama server logs viewer**
- Read `~/.ollama/logs/` and display in a collapsible terminal-style panel. Filter by level. Auto-scroll.
-
-- [x] **F9. Modelfile / template viewer**
- The `show` action already fetches Modelfile, template, and system prompt. Display in a collapsible code block in expanded model details.
-
-- [x] **F10. Dark/light theme toggle**
- Add `:root.light` CSS variable overrides. Theme toggle with localStorage persistence. Current architecture supports this natively.
-
-- [x] **F11. Keyboard shortcuts panel (`?` key)**
- Show all shortcuts in a modal: ⌘+Enter (send), Esc (close), R (refresh), / (search models), ? (help).
-
-- [x] **F12. Whisper transcription test**
- Upload/record a short audio clip, transcribe locally via whisper-cli, display result with latency. Tests the full local STT pipeline.
-
-- [x] **F13. Responsive mobile layout**
- Better breakpoints for the 4-column stats row and 3-column main grid. Collapsible sidebar on mobile.
-
-- [x] **F14. Model tags/labels (localStorage)**
- User-defined tags (coding, fast, vision) with colored badges. Persisted in localStorage.
-
-- [x] **F15. Extraction service integration panel**
- Show extraction-service (port 4005) health status. Run test extractions against loaded Ollama models. Bridges dashboard to LysnrAI pipeline.
-
-- [x] **F16. Auto-load preferred model**
- Mark a model as "auto-load" (stored in localStorage). When Ollama is online but no models loaded, auto-load the preferred model.
-
----
-
-## 4. Performance & Reliability
-
-- [x] **P1. No request deduplication on Refresh** — `page.tsx:164-176`
- Rapid clicks on Refresh fire duplicate `fetchAll()` calls. Add a `fetchingRef` guard or disable the button during fetch (partially done for `actionLoading` but not for `fetchAll`).
-
-- [x] **P2. Static cache never expires** — `api/system/route.ts:81-90`
- `staticCache` (chip, GPU, brew) lives forever in the server process. Brew package upgrades won't reflect. Add 5-minute TTL.
-
-- [x] **P3. `du -sk ~/.ollama/models` on every refresh** — `api/system/route.ts:41`
- Traverses entire models directory every 15 seconds. Cache with 60-second TTL.
-
-- [x] **P4. No fetch timeout on Ollama calls** — `api/ollama/route.ts:5-12`
- `fetchOllama` has no `AbortSignal` or timeout. If Ollama hangs, the dashboard hangs. Add 5-second timeout.
-
-- [x] **P5. `system_profiler` slow on first load** — `api/system/route.ts:52-53`
- Takes ~2-3 seconds. Cached after first call, but first dashboard load waits. Consider eager background fetch on server start or return placeholder.
-
----
-
-## 5. Security & Hardening
-
-- [x] **S1. No input validation on model names** — `api/ollama/route.ts:50-51`
- `model` from request body passed directly to Ollama. Add regex validation: `^[a-zA-Z0-9._:/-]{1,256}$`.
-
-- [x] **S2. Shell command interpolation pattern** — `api/system/route.ts:67`
- `execAsync(\`brew list --versions ${pkg}\`)`— safe today (hardcoded targets) but fragile. Use`execFile('brew', ['list', '--versions', pkg])` for safety.
-
-- [x] **S3. No CORS or auth** _(acceptable for local-only, documented)_
- Any local process can call API routes. Fine for dev tool; document the assumption.
-
----
-
-## 6. Implementation Tracker
-
-### Sprint 1 — Critical Bug Fixes _(est. 1–2 hrs)_
-
-| # | ID | Task | Effort | Commit |
-| --- | --------- | ----------------------------------------- | ------ | --------- |
-| 1 | - [x] B4 | Guard Escape key during streaming | 5 min | `2da67c2` |
-| 2 | - [x] B5 | Pause auto-refresh during prompt/pull | 10 min | `2da67c2` |
-| 3 | - [x] B9 | Add AbortController to streaming fetch | 15 min | `2da67c2` |
-| 4 | - [x] B1 | Dynamic chip/RAM in header | 5 min | `2da67c2` |
-| 5 | - [x] B11 | Clear prompt text on Escape close | 5 min | `2da67c2` |
-| 6 | - [x] P4 | Add timeout to Ollama fetch calls | 10 min | `2da67c2` |
-| 7 | - [x] B3 | Remove dead generate action (or document) | 5 min | `2da67c2` |
-| 8 | - [x] B6 | Use Date.now() for toast IDs | 2 min | `2da67c2` |
-| 9 | - [x] B10 | Fix brew "Loading..." vs "empty" state | 5 min | `2da67c2` |
-
-### Sprint 2 — Pull Progress + Metrics _(est. 2–3 hrs)_
-
-| # | ID | Task | Effort | Commit |
-| --- | ----------- | ----------------------------------- | ------ | --------- |
-| 10 | - [x] B2+F1 | Streaming pull with progress bar | 60 min | `2d9475b` |
-| 11 | - [x] F6 | Display tokens/s after generation | 30 min | `2d9475b` |
-| 12 | - [x] B7 | Parse vm_stat page size dynamically | 10 min | `2d9475b` |
-| 13 | - [x] B8 | Multi-path whisper model discovery | 15 min | `2d9475b` |
-
-### Sprint 3 — Component Refactor _(est. 2–3 hrs)_
-
-| # | ID | Task | Effort | Commit |
-| --- | --------- | --------------------------------------- | ------ | --------- |
-| 14 | - [x] CQ1 | Extract components into separate files | 90 min | `75a3cd0` |
-| 15 | - [x] CQ4 | Add error.tsx Error Boundary | 15 min | `75a3cd0` |
-| 16 | - [x] CQ3 | Shared ollama-config.ts | 10 min | `75a3cd0` |
-| 17 | - [x] CQ2 | Consolidate inline styles → CSS classes | 45 min | `ed93a6f` |
-| 18 | - [x] S1 | Add model name input validation | 10 min | `75a3cd0` |
-| 19 | - [x] S2 | Replace exec → execFile for brew | 10 min | `75a3cd0` |
-
-### Sprint 4 — UX Enhancements _(est. 3–4 hrs)_
-
-| # | ID | Task | Effort | Commit |
-| --- | --------- | ------------------------------------ | ------ | --------- |
-| 20 | - [x] F3 | Prompt history (localStorage) | 45 min | `9c2f5f3` |
-| 21 | - [x] F9 | Modelfile viewer in expanded details | 30 min | `9c2f5f3` |
-| 22 | - [x] F4 | Chat mode (multi-turn via /api/chat) | 90 min | `ed93a6f` |
-| 23 | - [x] F2 | Model search/filter | 30 min | `9c2f5f3` |
-| 24 | - [x] F11 | Keyboard shortcuts panel | 20 min | `9c2f5f3` |
-
-### Sprint 5 — Integration & Polish _(est. 2–3 hrs)_
-
-| # | ID | Task | Effort | Commit |
-| --- | ----------- | -------------------------- | ------ | --------- |
-| 25 | - [x] F15 | Extraction service panel | 60 min | `8bdd5ee` |
-| 26 | - [x] F12 | Whisper transcription test | 45 min | `8bdd5ee` |
-| 27 | - [x] F7 | System resource sparklines | 45 min | `8bdd5ee` |
-| 28 | - [x] CQ5 | Loading skeleton UI | 20 min | `8bdd5ee` |
-| 29 | - [x] P1-P3 | Request dedup + cache TTLs | 30 min | `b1fda3a` |
-| 30 | - [x] F16 | Auto-load preferred model | 20 min | `ed93a6f` |
-
-### Deferred (nice-to-have)
-
-| ID | Task | Notes |
-| --------- | ------------------------------- | --------- |
-| - [x] F5 | Model comparison (side-by-side) | `8bdd5ee` |
-| - [x] F10 | Dark/light theme toggle | `ed93a6f` |
-| - [x] F13 | Responsive mobile layout | `8bdd5ee` |
-| - [x] F14 | Model tags/labels | `ed93a6f` |
-| - [x] CQ6 | Zod validation on API responses | `ed93a6f` |
-| - [x] F8 | Ollama server logs viewer | `8bdd5ee` |
-| - [x] S3 | CORS / auth (documented) | `8bdd5ee` |
-
----
-
-## 7. Commit Log
-
-_Commits will be added here as work progresses._
-
-| # | Date | Commit | Sprint | Items Completed |
-| --- | ------ | --------- | -------- | ------------------------------------ |
-| 1 | Feb 19 | `2da67c2` | Sprint 1 | B1, B3, B4, B5, B6, B9, B10, B11, P4 |
-| 2 | Feb 19 | `2d9475b` | Sprint 2 | B2, B7, B8, F1, F6 |
-| 3 | Feb 19 | `75a3cd0` | Sprint 3 | CQ1, CQ3, CQ4, S1, S2 |
-| 4 | Feb 19 | `9c2f5f3` | Sprint 4 | F2, F3, F9, F11 |
-| 5 | Feb 19 | `b1fda3a` | Sprint 5 | P1, P2, P3 |
-| 6 | Feb 19 | `ed93a6f` | Sprint 6 | CQ2, CQ6, P5, F4, F10, F14, F16 |
-| 7 | Feb 19 | `8bdd5ee` | Sprint 7 | F5, F7, F8, F12, F13, F15, CQ5, S3 |
-
----
-
-> **39 items total:** 11 bugs, 6 code quality, 16 features, 5 performance, 3 security
-> **All 39 items completed** across 7 sprints (9 code commits + doc updates)
-> **Actual total effort:** ~8 hours across 7 sprints
-
----
-
-## 8. Next Wave — Model Intelligence & Pre-Load Metrics
-
-> Proposed improvements focused on helping users make informed decisions **before** loading a model.
-
-### Tier A — Pre-Load Decision Metrics _(est. 45 min)_
-
-| ID | Feature | Description |
-| --- | ------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------- |
-| N1 | **Estimated RAM per model** | Approximate from disk size: Q4_K_M ≈ 1.2×disk in RAM. Show on every model card (e.g., `~22 GB RAM`), not just running models. |
-| N2 | **"Will it fit?" indicator** | Compare estimated RAM vs `system.memory.free + cached`. Color-code: 🟢 Fits, 🟡 Tight (80–100%), 🔴 Won't fit. Show on Load button or as badge. |
-| N3 | **Aggregate loaded model RAM** | Sum VRAM of all running models. Display at top of models panel: "3 models loaded · 28.5 GB VRAM". |
-
-### Tier B — Rich Model Metadata _(est. 60 min)_
-
-| ID | Feature | Description |
-| --- | ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | ---------------- | ---------------- | ------------------------------------------ |
-| N4 | **RAM budget bar** | Horizontal stacked bar: `[OS+Apps | Model A (loaded) | Model B (loaded) | Free]`. Instant visual of memory headroom. |
-| N5 | **Context window size** | Fetch `context_length` from Ollama `/api/show` → `model_info`. Display on card (e.g., `128k ctx`). Critical for knowing max prompt length. |
-
-### Tier C — Model Intelligence Badges _(est. 45 min)_
-
-| ID | Feature | Description |
-| --- | --------------------------- | --------------------------------------------------------------------------------------------------------------------------------- |
-| N6 | **`` warning badge** | If model is DeepSeek R1 family, show ⚠️ badge: "Emits `` traces — strip before JSON.parse". Prevents silent JSON failures. |
-| N7 | **Vision model indicator** | If model is multimodal (llava, qwen2.5vl), show 👁 badge. These need image input — text-only prompts are suboptimal. |
-| N8 | **Architecture badge** | Show model arch (llama, qwen2, phi3, deepseek2) as subtle pill on the card. Currently buried in expanded details. |
-| N9 | **Sort/order models** | Dropdown to sort by: name, size, parameters, running status, last modified. Currently uses Ollama's default order. |
-| N10 | **Ollama version display** | Call `/api/version`. Show in Ollama status card. Useful for debugging model compatibility. |
-
-### Tier D — Runtime Metrics & UX _(est. 30 min)_
-
-| ID | Feature | Description |
-| --- | --------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
-| N11 | **Last known tok/s per model** | Persist `StreamMetrics.tokensPerSec` in localStorage keyed by model. Show on card (e.g., `~45 tok/s`). Compare speeds without re-benchmarking. |
-| N12 | **Auto-unload countdown** | Replace static `Expires: 3:45 PM` with live countdown: `Unloads in 4m 32s`. More actionable. |
-| N13 | **Session stats per model** | Track prompts sent + tokens generated per model in session. Show in expanded details. |
-| N14 | **Delete confirmation + reclaim** | Show "Delete qwen2.5-coder:32b? Reclaim 18.5 GB disk." before deleting. Currently no confirmation. |
-| N15 | **Simultaneous load suggestions** | Based on available RAM, suggest which models can be co-loaded. E.g., "Can co-load llama3.1:8b + qwen2.5-coder:32b (28 GB, 20 GB free)". |
-
-### Implementation Plan
-
-| Sprint | Items | Focus | Effort |
-| ------ | ----------------------- | ------------------------ | ------- |
-| 8 | N1, N2, N3 | Pre-load RAM estimates | ~45 min |
-| 9 | N4, N5 | RAM bar + context window | ~60 min |
-| 10 | N6, N7, N8, N9, N10 | Badges + sort + version | ~45 min |
-| 11 | N11, N12, N13, N14, N15 | Runtime metrics + UX | ~30 min |
diff --git a/__LOCAL_LLMs/docs/README.md b/__LOCAL_LLMs/docs/README.md
index 95d76514..9c1cb8e1 100644
--- a/__LOCAL_LLMs/docs/README.md
+++ b/__LOCAL_LLMs/docs/README.md
@@ -2,7 +2,7 @@
> Complete guide for the local AI inference stack on the ByteLyst development machine.
> Hardware: **Apple M4 Pro · 48 GB LPDDR5 · macOS Tahoe**
-> Last updated: 2026-02-19
+> Last updated: 2026-02-21
---
@@ -16,8 +16,11 @@ ollama serve # or: brew services start ollama
ollama run qwen2.5-coder:32b # best coding model for this hardware
# 3. Launch Mission Control dashboard
-cd __LOCAL_LLMs/dashboard && npm run dev -- -p 3100
-# Open http://localhost:3100
+cd __LOCAL_LLMs/dashboard && npm run dev
+# Open http://localhost:3000
+
+# 4. (Optional) Set up TTS
+cd __LOCAL_LLMs && bash setup-tts.sh
```
---
@@ -35,6 +38,7 @@ cd __LOCAL_LLMs/dashboard && npm run dev -- -p 3100
| 07 | [Model Recommendations](07-model-recommendations.md) | Tiered model guide by use case, size, and quality for M4 Pro 48GB |
| 08 | [Troubleshooting & Corporate Proxy](08-troubleshooting.md) | Common issues, Forcepoint proxy workarounds, MLX warnings |
| 09 | [Environment Variables](09-environment-variables.md) | All config vars for Ollama, Whisper, dashboard, evals |
+| 10 | [Text-to-Speech](10-text-to-speech.md) | Orpheus TTS via Ollama, Qwen3-TTS 0.6B, setup, corporate proxy |
---
@@ -53,28 +57,42 @@ __LOCAL_LLMs/
│ ├── 06-extraction-service-evals.md
│ ├── 07-model-recommendations.md
│ ├── 08-troubleshooting.md
-│ └── 09-environment-variables.md
-├── dashboard/ ← Next.js Mission Control app (port 3100)
-│ ├── src/app/page.tsx ← main dashboard UI
+│ ├── 09-environment-variables.md
+│ └── 10-text-to-speech.md
+├── dashboard/ ← Next.js Mission Control app (port 3000)
+│ ├── src/app/(mission-control)/ ← Mission Control page + memory drilldown
│ ├── src/app/api/ollama/route.ts ← Ollama API proxy (list, load, unload, generate)
│ ├── src/app/api/whisper/route.ts ← Whisper binary/model discovery
-│ └── src/app/api/system/route.ts ← System info (chip, RAM, disk, brew)
+│ ├── src/app/api/system/route.ts ← System info (chip, RAM, disk, brew)
+│ └── src/app/api/system/memory/route.ts ← Memory drilldown (vm_stat + top processes)
+├── setup-tts.sh ← One-shot TTS setup for fresh laptop
+├── download-tts-models.sh ← Download model weights (uses hf-mirror.com)
+├── test_orpheus_tts.py ← Orpheus TTS test (Ollama + SNAC decoder)
+├── test_qwen_tts.py ← Qwen3-TTS 0.6B test (direct Python, MPS/CPU)
+├── .venv-qwen-tts/ ← Python 3.12 venv for TTS (gitignored)
+├── models/ ← Downloaded TTS model weights (gitignored)
└── LOCAL_LLMs_setup_mac_m4_48gb.md ← original doc (preserved, see docs/ for latest)
```
---
-## Current Installation Status (2026-02-19)
+## Current Installation Status (2026-02-21)
-| Component | Version | Status | Disk Usage |
-| ----------------------------------- | ---------- | ----------------------------- | ---------- |
-| Ollama | 0.16.2 | ✅ Installed via brew | — |
-| qwen2.5-coder:32b | — | ✅ Downloaded | 19 GB |
-| llama3.1:8b | — | ✅ Downloaded | 4.9 GB |
-| whisper-cpp | 1.8.3 | ✅ Installed via brew | 9.6 MB |
-| whisper model (ggml-large-v3-turbo) | — | ❌ Blocked by corporate proxy | — |
-| ffmpeg | 8.0.1 | ✅ Installed via brew | 53.3 MB |
-| Mission Control Dashboard | Next.js 16 | ✅ Built, runs on :3100 | — |
+| Component | Version | Status | Disk Usage |
+| ----------------------------------- | ---------- | ------------------------------------------ | ---------- |
+| Ollama | 0.16.2 | ✅ Installed via brew | — |
+| qwen2.5-coder:32b | — | ✅ Downloaded | 19 GB |
+| qwen2.5-coder:7b | — | ✅ Downloaded | 4.7 GB |
+| deepseek-r1:32b | — | ✅ Downloaded | 19 GB |
+| llama3.1:8b | — | ✅ Downloaded | 4.9 GB |
+| sematre/orpheus:en (TTS) | — | ✅ Downloaded via Ollama | 4 GB |
+| whisper-cpp | 1.8.3 | ✅ Installed via brew | 9.6 MB |
+| whisper model (ggml-large-v3-turbo) | — | ✅ Downloaded via hf-mirror.com | 1.5 GB |
+| ffmpeg | 8.0.1 | ✅ Installed via brew | 53.3 MB |
+| Python 3.12 (TTS venv) | 3.12.12 | ✅ Installed via brew + venv created | ~2 GB |
+| SNAC decoder (TTS) | — | ✅ Downloaded via hf-mirror.com | 76 MB |
+| Qwen3-TTS 0.6B | — | ✅ Downloaded via hf-mirror.com | 1.7 GB |
+| Mission Control Dashboard | Next.js 16 | ✅ Built, runs on :3000 (memory drilldown) | — |
---
diff --git a/__LOCAL_LLMs/download-tts-models.sh b/__LOCAL_LLMs/download-tts-models.sh
new file mode 100755
index 00000000..d150a50f
--- /dev/null
+++ b/__LOCAL_LLMs/download-tts-models.sh
@@ -0,0 +1,174 @@
+#!/bin/bash
+# ============================================================
+# Download TTS Model Weights
+#
+# Downloads SNAC decoder + Qwen3-TTS from HuggingFace.
+# Uses hf-mirror.com which works through corporate proxy.
+# Falls back to huggingface.co if mirror is unreachable.
+#
+# No Python venv required — uses curl only.
+#
+# Usage:
+# bash download-tts-models.sh # download all
+# bash download-tts-models.sh snac # SNAC decoder only
+# bash download-tts-models.sh qwen # Qwen3-TTS only
+# ============================================================
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+MODELS_DIR="$SCRIPT_DIR/models"
+
+GREEN='\033[0;32m'
+RED='\033[0;31m'
+NC='\033[0m'
+ok() { echo -e "${GREEN}✓${NC} $1"; }
+fail() { echo -e "${RED}✗${NC} $1"; exit 1; }
+
+echo "=== TTS Model Downloader ==="
+echo ""
+
+# ── Pick HuggingFace source ─────────────────────────────────
+# Try hf-mirror.com first (works through corporate proxy)
+# Fall back to huggingface.co (requires non-corporate network)
+HF_BASE=""
+echo "Testing hf-mirror.com..."
+if curl -k -s --max-time 5 "https://hf-mirror.com/hubertsiuzdak/snac_24khz/raw/main/config.json" | python3 -c "import sys,json; json.load(sys.stdin)" &>/dev/null; then
+ HF_BASE="https://hf-mirror.com"
+ ok "Using hf-mirror.com (works through corporate proxy)"
+else
+ echo "Mirror unavailable. Testing huggingface.co..."
+ if curl -s --max-time 5 "https://huggingface.co/api/models/hubertsiuzdak/snac_24khz" -o /dev/null 2>/dev/null; then
+ HF_BASE="https://huggingface.co"
+ ok "Using huggingface.co directly"
+ else
+ fail "Cannot reach hf-mirror.com or huggingface.co. If on corporate network, try from home WiFi."
+ fi
+fi
+echo ""
+
+mkdir -p "$MODELS_DIR"
+
+# ── Helper: download with validation ────────────────────────
+download_file() {
+ local URL="$1"
+ local DEST="$2"
+ local DESC="$3"
+
+ echo " Downloading $DESC..."
+ curl -k -L --progress-bar -o "$DEST" "$URL"
+
+ # Verify not an HTML block page
+ FILE_HEAD=$(head -c 50 "$DEST" 2>/dev/null)
+ if echo "$FILE_HEAD" | grep -qi "/dev/null || stat -c%s "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null || echo 0)
+ if [ "$SIZE" -gt 1000000 ]; then
+ ok "Already downloaded ($(echo "scale=1; $SIZE/1048576" | bc) MB)"
+ echo ""
+ return
+ fi
+ fi
+
+ download_file "$HF_BASE/hubertsiuzdak/snac_24khz/raw/main/config.json" \
+ "$MODELS_DIR/snac_24khz/config.json" "config.json"
+
+ download_file "$HF_BASE/hubertsiuzdak/snac_24khz/resolve/main/pytorch_model.bin" \
+ "$MODELS_DIR/snac_24khz/pytorch_model.bin" "pytorch_model.bin (~76 MB)"
+
+ ok "SNAC decoder downloaded"
+ echo ""
+}
+
+# ── 2. Qwen3-TTS Tokenizer ──────────────────────────────────
+download_qwen_tokenizer() {
+ echo "=== [Qwen3-TTS] Tokenizer (~650 MB) ==="
+ local DIR="$MODELS_DIR/Qwen3-TTS-Tokenizer-12Hz"
+ mkdir -p "$DIR"
+
+ if [ -f "$DIR/model.safetensors" ]; then
+ SIZE=$(stat -f%z "$DIR/model.safetensors" 2>/dev/null || stat -c%s "$DIR/model.safetensors" 2>/dev/null || echo 0)
+ if [ "$SIZE" -gt 100000000 ]; then
+ ok "Already downloaded ($(echo "scale=1; $SIZE/1048576" | bc) MB)"
+ echo ""
+ return
+ fi
+ fi
+
+ for f in config.json configuration.json preprocessor_config.json; do
+ download_file "$HF_BASE/Qwen/Qwen3-TTS-Tokenizer-12Hz/resolve/main/$f" \
+ "$DIR/$f" "$f"
+ done
+
+ download_file "$HF_BASE/Qwen/Qwen3-TTS-Tokenizer-12Hz/resolve/main/model.safetensors" \
+ "$DIR/model.safetensors" "model.safetensors (~650 MB)"
+
+ ok "Qwen3-TTS Tokenizer downloaded"
+ echo ""
+}
+
+# ── 3. Qwen3-TTS 0.6B model ─────────────────────────────────
+download_qwen_model() {
+ echo "=== [Qwen3-TTS] 0.6B CustomVoice (~1.2 GB) ==="
+ local DIR="$MODELS_DIR/Qwen3-TTS-12Hz-0.6B-CustomVoice"
+ mkdir -p "$DIR"
+
+ if [ -f "$DIR/model.safetensors" ]; then
+ SIZE=$(stat -f%z "$DIR/model.safetensors" 2>/dev/null || stat -c%s "$DIR/model.safetensors" 2>/dev/null || echo 0)
+ if [ "$SIZE" -gt 100000000 ]; then
+ ok "Already downloaded ($(echo "scale=1; $SIZE/1048576" | bc) MB)"
+ echo ""
+ return
+ fi
+ fi
+
+ for f in config.json generation_config.json; do
+ download_file "$HF_BASE/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice/resolve/main/$f" \
+ "$DIR/$f" "$f"
+ done
+
+ download_file "$HF_BASE/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice/resolve/main/model.safetensors" \
+ "$DIR/model.safetensors" "model.safetensors (~1.2 GB)"
+
+ ok "Qwen3-TTS 0.6B downloaded"
+ echo ""
+}
+
+# ── Run downloads ────────────────────────────────────────────
+case "${1:-all}" in
+ snac)
+ download_snac
+ ;;
+ qwen)
+ download_qwen_tokenizer
+ download_qwen_model
+ ;;
+ all)
+ download_snac
+ download_qwen_tokenizer
+ download_qwen_model
+ ;;
+ *)
+ echo "Usage: bash download-tts-models.sh [snac|qwen|all]"
+ exit 1
+ ;;
+esac
+
+# ── Summary ──────────────────────────────────────────────────
+echo "=== Downloads complete ==="
+echo ""
+echo "Disk usage:"
+du -sh "$MODELS_DIR"/* 2>/dev/null | sed 's/^/ /'
+echo ""
+echo "Test commands:"
+echo " .venv-qwen-tts/bin/python test_orpheus_tts.py # Orpheus via Ollama"
+echo " .venv-qwen-tts/bin/python test_qwen_tts.py # Qwen3-TTS direct"
diff --git a/__LOCAL_LLMs/setup-tts.sh b/__LOCAL_LLMs/setup-tts.sh
new file mode 100755
index 00000000..852c7e0a
--- /dev/null
+++ b/__LOCAL_LLMs/setup-tts.sh
@@ -0,0 +1,256 @@
+#!/bin/bash
+# ============================================================
+# TTS Setup — One-Shot Script for Fresh Laptop
+#
+# Sets up Orpheus TTS (via Ollama) and Qwen3-TTS (direct Python)
+# on Apple Silicon Macs. Works through corporate proxy.
+#
+# What this does:
+# 1. Installs Python 3.12 via Homebrew (if missing)
+# 2. Creates Python venv with TTS packages
+# 3. Pulls Orpheus TTS model via Ollama
+# 4. Downloads SNAC audio decoder via hf-mirror.com
+# 5. (Optional) Downloads Qwen3-TTS 0.6B via hf-mirror.com
+#
+# Prerequisites:
+# - macOS with Apple Silicon (M1/M2/M3/M4)
+# - Homebrew installed
+# - Ollama installed (brew install ollama)
+#
+# Usage:
+# bash setup-tts.sh
+#
+# After setup, test with:
+# .venv-qwen-tts/bin/python test_orpheus_tts.py
+# afplay test_orpheus_tara.wav
+# ============================================================
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+VENV="$SCRIPT_DIR/.venv-qwen-tts"
+MODELS_DIR="$SCRIPT_DIR/models"
+
+# HuggingFace mirror that works through corporate proxy
+HF_MIRROR="https://hf-mirror.com"
+
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+
+ok() { echo -e "${GREEN}✓${NC} $1"; }
+warn() { echo -e "${YELLOW}⚠${NC} $1"; }
+fail() { echo -e "${RED}✗${NC} $1"; exit 1; }
+step() { echo -e "\n${GREEN}=== $1 ===${NC}"; }
+
+echo "╔══════════════════════════════════════════════╗"
+echo "║ TTS Setup — Local Speech Generation ║"
+echo "║ Orpheus TTS (Ollama) + Qwen3-TTS (Python) ║"
+echo "╚══════════════════════════════════════════════╝"
+echo ""
+
+# ── 0. Check prerequisites ──────────────────────────────────
+step "Checking prerequisites"
+
+# Homebrew
+if ! command -v brew &>/dev/null; then
+ fail "Homebrew not found. Install: /bin/bash -c \"\$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)\""
+fi
+ok "Homebrew"
+
+# Ollama
+if ! command -v ollama &>/dev/null; then
+ warn "Ollama not found. Installing..."
+ brew install ollama
+fi
+ok "Ollama installed"
+
+# Check if Ollama is running
+if ! curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then
+ warn "Ollama not running. Starting..."
+ ollama serve &>/dev/null &
+ sleep 3
+ if ! curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then
+ fail "Could not start Ollama. Try manually: ollama serve"
+ fi
+fi
+ok "Ollama running on port 11434"
+
+# Apple Silicon check
+ARCH=$(uname -m)
+if [ "$ARCH" != "arm64" ]; then
+ warn "Not Apple Silicon ($ARCH). MPS acceleration won't be available."
+fi
+
+# ── 1. Install Python 3.12 ──────────────────────────────────
+step "Python 3.12"
+
+PYTHON_CMD=""
+# Check various Python 3.12 locations
+for cmd in python3.12 /opt/homebrew/bin/python3.12 /usr/local/bin/python3.12; do
+ if command -v "$cmd" &>/dev/null; then
+ PYTHON_CMD="$cmd"
+ break
+ fi
+done
+
+if [ -z "$PYTHON_CMD" ]; then
+ warn "Python 3.12 not found. Installing via Homebrew..."
+ brew install python@3.12
+ PYTHON_CMD="/opt/homebrew/bin/python3.12"
+fi
+
+PYTHON_VER=$("$PYTHON_CMD" --version 2>&1)
+ok "$PYTHON_VER at $PYTHON_CMD"
+
+# ── 2. Create venv ──────────────────────────────────────────
+step "Python virtual environment"
+
+if [ -f "$VENV/bin/python" ]; then
+ ok "Venv exists at $VENV"
+else
+ echo "Creating venv..."
+ "$PYTHON_CMD" -m venv "$VENV"
+ ok "Venv created at $VENV"
+fi
+
+# ── 3. Install Python packages ──────────────────────────────
+step "Python packages"
+
+# Check if snac is installed (quick proxy for all packages)
+if "$VENV/bin/python" -c "import snac" &>/dev/null; then
+ ok "Packages already installed (snac, torch, etc.)"
+else
+ echo "Installing packages (this may take a few minutes)..."
+ "$VENV/bin/pip" install -U pip --quiet
+ "$VENV/bin/pip" install -U snac qwen-tts --quiet
+ ok "Packages installed"
+fi
+
+# ── 4. Pull Orpheus TTS model ───────────────────────────────
+step "Orpheus TTS model (Ollama)"
+
+if ollama list 2>/dev/null | grep -q "orpheus"; then
+ ok "Orpheus TTS already downloaded"
+else
+ echo "Pulling sematre/orpheus:en (4 GB)..."
+ NO_PROXY="ollama.com,registry.ollama.ai" ollama pull sematre/orpheus:en
+ ok "Orpheus TTS downloaded"
+fi
+
+# ── 5. Download SNAC decoder ────────────────────────────────
+step "SNAC 24kHz audio decoder (~76 MB)"
+
+mkdir -p "$MODELS_DIR/snac_24khz"
+
+if [ -f "$MODELS_DIR/snac_24khz/pytorch_model.bin" ]; then
+ SIZE=$(stat -f%z "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null || stat -c%s "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null)
+ if [ "$SIZE" -gt 1000000 ]; then
+ ok "SNAC decoder already downloaded ($(echo "scale=1; $SIZE/1048576" | bc) MB)"
+ else
+ warn "SNAC file looks corrupted (${SIZE} bytes). Re-downloading..."
+ rm -f "$MODELS_DIR/snac_24khz/pytorch_model.bin"
+ fi
+fi
+
+if [ ! -f "$MODELS_DIR/snac_24khz/pytorch_model.bin" ]; then
+ echo "Downloading config.json..."
+ curl -k -sL -o "$MODELS_DIR/snac_24khz/config.json" \
+ "$HF_MIRROR/hubertsiuzdak/snac_24khz/raw/main/config.json"
+
+ # Verify config is JSON (not an HTML block page)
+ if ! python3 -c "import json; json.load(open('$MODELS_DIR/snac_24khz/config.json'))" &>/dev/null; then
+ fail "Downloaded config.json is not valid JSON. The mirror may be blocked. Try from home network."
+ fi
+ ok "config.json downloaded"
+
+ echo "Downloading pytorch_model.bin (~76 MB)..."
+ curl -k -L --progress-bar -o "$MODELS_DIR/snac_24khz/pytorch_model.bin" \
+ "$HF_MIRROR/hubertsiuzdak/snac_24khz/resolve/main/pytorch_model.bin"
+
+ # Verify it's a real model file (zip/pytorch format), not HTML
+ FILE_TYPE=$(file -b "$MODELS_DIR/snac_24khz/pytorch_model.bin" | head -c 20)
+ if echo "$FILE_TYPE" | grep -qi "html"; then
+ rm -f "$MODELS_DIR/snac_24khz/pytorch_model.bin"
+ fail "Downloaded model is HTML (proxy block page). Try from home network."
+ fi
+ ok "SNAC decoder downloaded"
+fi
+
+# Verify SNAC loads in Python
+echo "Verifying SNAC decoder loads..."
+if "$VENV/bin/python" -c "
+import snac, torch
+model = snac.SNAC.from_pretrained('$MODELS_DIR/snac_24khz')
+print(f'SNAC: {sum(p.numel() for p in model.parameters())/1e6:.1f}M parameters')
+" 2>/dev/null; then
+ ok "SNAC decoder verified"
+else
+ fail "SNAC decoder failed to load. Delete models/snac_24khz/ and re-run."
+fi
+
+# ── 6. (Optional) Download Qwen3-TTS ────────────────────────
+step "Qwen3-TTS 0.6B (optional, ~1.7 GB total)"
+
+QWEN_TOKENIZER_DIR="$MODELS_DIR/Qwen3-TTS-Tokenizer-12Hz"
+QWEN_MODEL_DIR="$MODELS_DIR/Qwen3-TTS-12Hz-0.6B-CustomVoice"
+
+if [ -d "$QWEN_MODEL_DIR" ] && [ -f "$QWEN_MODEL_DIR/config.json" ]; then
+ ok "Qwen3-TTS already downloaded"
+else
+ echo "Qwen3-TTS 0.6B requires ~1.7 GB download (tokenizer + model)."
+ echo "This is optional — Orpheus TTS (above) works without it."
+ read -p "Download Qwen3-TTS? [y/N] " -n 1 -r
+ echo
+ if [[ $REPLY =~ ^[Yy]$ ]]; then
+ # Tokenizer (~650 MB)
+ echo "Downloading Qwen3-TTS Tokenizer (~650 MB)..."
+ mkdir -p "$QWEN_TOKENIZER_DIR"
+ for f in config.json configuration.json preprocessor_config.json; do
+ curl -k -sL -o "$QWEN_TOKENIZER_DIR/$f" \
+ "$HF_MIRROR/Qwen/Qwen3-TTS-Tokenizer-12Hz/resolve/main/$f" 2>/dev/null || true
+ done
+ curl -k -L --progress-bar -o "$QWEN_TOKENIZER_DIR/model.safetensors" \
+ "$HF_MIRROR/Qwen/Qwen3-TTS-Tokenizer-12Hz/resolve/main/model.safetensors"
+ ok "Tokenizer downloaded"
+
+ # Model
+ echo "Downloading Qwen3-TTS 0.6B (~1.2 GB)..."
+ mkdir -p "$QWEN_MODEL_DIR"
+ for f in config.json generation_config.json; do
+ curl -k -sL -o "$QWEN_MODEL_DIR/$f" \
+ "$HF_MIRROR/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice/resolve/main/$f" 2>/dev/null || true
+ done
+ curl -k -L --progress-bar -o "$QWEN_MODEL_DIR/model.safetensors" \
+ "$HF_MIRROR/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice/resolve/main/model.safetensors"
+ ok "Qwen3-TTS 0.6B downloaded"
+ else
+ warn "Skipped. You can re-run this script later to download."
+ fi
+fi
+
+# ── Summary ──────────────────────────────────────────────────
+step "Setup Complete"
+
+echo ""
+echo "Installed components:"
+echo " Orpheus TTS (Ollama): $(ollama list 2>/dev/null | grep orpheus | awk '{print $NF}' || echo 'ready')"
+echo " SNAC decoder: $MODELS_DIR/snac_24khz/"
+if [ -d "$QWEN_MODEL_DIR" ] && [ -f "$QWEN_MODEL_DIR/config.json" ]; then
+ echo " Qwen3-TTS 0.6B: $QWEN_MODEL_DIR/"
+else
+ echo " Qwen3-TTS 0.6B: (not installed — re-run setup to add)"
+fi
+echo ""
+echo "Disk usage:"
+du -sh "$MODELS_DIR"/* 2>/dev/null | sed 's/^/ /'
+echo ""
+echo "Test commands:"
+echo " $VENV/bin/python $SCRIPT_DIR/test_orpheus_tts.py"
+echo " afplay test_orpheus_tara.wav"
+if [ -d "$QWEN_MODEL_DIR" ]; then
+ echo " $VENV/bin/python $SCRIPT_DIR/test_qwen_tts.py"
+fi
+echo ""
+echo "Voices: tara, leah, jess, leo, dan, mia, zac, zoe"
+echo "Emotion: , , , , , , "
diff --git a/__LOCAL_LLMs/start-dashboard.sh b/__LOCAL_LLMs/start-dashboard.sh
new file mode 100755
index 00000000..f4404126
--- /dev/null
+++ b/__LOCAL_LLMs/start-dashboard.sh
@@ -0,0 +1,110 @@
+#!/bin/bash
+# ============================================================
+# Start Mission Control Dashboard + Ollama
+#
+# Usage:
+# bash start-dashboard.sh # start dashboard + ensure Ollama running
+# bash start-dashboard.sh stop # stop dashboard
+# bash start-dashboard.sh status # check status
+# ============================================================
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+DASHBOARD_DIR="$SCRIPT_DIR/dashboard"
+PORT=3000
+OLLAMA_URL="http://localhost:11434"
+
+GREEN='\033[0;32m'
+RED='\033[0;31m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+ok() { echo -e "${GREEN}✓${NC} $1"; }
+warn() { echo -e "${YELLOW}⚠${NC} $1"; }
+fail() { echo -e "${RED}✗${NC} $1"; }
+
+case "${1:-start}" in
+ stop)
+ echo "Stopping dashboard..."
+ PID=$(lsof -ti :$PORT 2>/dev/null)
+ if [ -n "$PID" ]; then
+ kill "$PID" 2>/dev/null
+ ok "Dashboard stopped (PID $PID)"
+ else
+ warn "Dashboard not running on port $PORT"
+ fi
+ exit 0
+ ;;
+
+ status)
+ echo "=== Status ==="
+ # Ollama
+ if curl -s --max-time 2 "$OLLAMA_URL/api/tags" &>/dev/null; then
+ MODELS=$(curl -s "$OLLAMA_URL/api/tags" | python3 -c "import sys,json; print(len(json.load(sys.stdin).get('models',[])))" 2>/dev/null || echo "?")
+ ok "Ollama running ($MODELS models)"
+ else
+ fail "Ollama not running"
+ fi
+ # Dashboard
+ if curl -s --max-time 2 "http://localhost:$PORT" &>/dev/null; then
+ ok "Dashboard running at http://localhost:$PORT"
+ else
+ fail "Dashboard not running"
+ fi
+ exit 0
+ ;;
+
+ start)
+ echo "=== Starting Mission Control ==="
+ echo ""
+
+ # 1. Ensure Ollama is running
+ if curl -s --max-time 2 "$OLLAMA_URL/api/tags" &>/dev/null; then
+ ok "Ollama already running"
+ else
+ echo "Starting Ollama..."
+ ollama serve &>/dev/null &
+ sleep 2
+ if curl -s --max-time 2 "$OLLAMA_URL/api/tags" &>/dev/null; then
+ ok "Ollama started"
+ else
+ fail "Could not start Ollama. Try: ollama serve"
+ fi
+ fi
+
+ # 2. Check if dashboard already running
+ if curl -s --max-time 2 "http://localhost:$PORT" &>/dev/null; then
+ ok "Dashboard already running at http://localhost:$PORT"
+ exit 0
+ fi
+
+ # 3. Install deps if needed
+ if [ ! -d "$DASHBOARD_DIR/node_modules" ]; then
+ echo "Installing dependencies..."
+ (cd "$DASHBOARD_DIR" && npm install --silent)
+ ok "Dependencies installed"
+ fi
+
+ # 4. Start dashboard
+ echo "Starting dashboard on port $PORT..."
+ (cd "$DASHBOARD_DIR" && npm run dev &>/dev/null &)
+
+ # Wait for it to be ready
+ for i in $(seq 1 15); do
+ if curl -s --max-time 1 "http://localhost:$PORT" &>/dev/null; then
+ ok "Dashboard ready at http://localhost:$PORT"
+ echo ""
+ echo "Open: http://localhost:$PORT"
+ echo "Stop: bash start-dashboard.sh stop"
+ exit 0
+ fi
+ sleep 1
+ done
+
+ fail "Dashboard did not start within 15s. Check: cd dashboard && npm run dev"
+ exit 1
+ ;;
+
+ *)
+ echo "Usage: bash start-dashboard.sh [start|stop|status]"
+ exit 1
+ ;;
+esac
diff --git a/__LOCAL_LLMs/test_orpheus_tts.py b/__LOCAL_LLMs/test_orpheus_tts.py
new file mode 100644
index 00000000..17f05887
--- /dev/null
+++ b/__LOCAL_LLMs/test_orpheus_tts.py
@@ -0,0 +1,189 @@
+"""
+Test Orpheus TTS via Ollama + SNAC decoder.
+
+Prerequisites:
+ 1. bash setup-tts.sh (one-shot: installs everything)
+ -- OR manually --
+ 1. ollama pull sematre/orpheus:en
+ 2. bash download-tts-models.sh snac (downloads SNAC via hf-mirror.com)
+ 3. ollama serve (must be running)
+
+Usage:
+ .venv-qwen-tts/bin/python test_orpheus_tts.py
+"""
+import os
+import re
+import time
+import json
+import struct
+import wave
+import urllib.request
+
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+SNAC_MODEL_DIR = os.path.join(SCRIPT_DIR, "models", "snac_24khz")
+OLLAMA_URL = "http://localhost:11434"
+MODEL = "sematre/orpheus:en"
+
+AUDIO_TOKEN_RE = re.compile(r"")
+
+
+def check_ollama():
+ """Verify Ollama is running and model is available."""
+ try:
+ req = urllib.request.Request(f"{OLLAMA_URL}/api/tags")
+ with urllib.request.urlopen(req, timeout=3) as resp:
+ data = json.loads(resp.read())
+ names = [m["name"] for m in data.get("models", [])]
+ if not any(MODEL in n for n in names):
+ print(f"ERROR: Model '{MODEL}' not found. Run: ollama pull {MODEL}")
+ return False
+ return True
+ except Exception as e:
+ print(f"ERROR: Cannot connect to Ollama at {OLLAMA_URL}: {e}")
+ print("Run: ollama serve")
+ return False
+
+
+def check_snac():
+ """Verify SNAC model is downloaded."""
+ if not os.path.isdir(SNAC_MODEL_DIR):
+ print(f"ERROR: SNAC decoder not found at {SNAC_MODEL_DIR}")
+ print("Run: bash setup-tts.sh (or: bash download-tts-models.sh snac)")
+ return False
+ return True
+
+
+def load_snac():
+ """Load SNAC audio codec."""
+ import torch
+ import snac
+
+ print(f"Loading SNAC decoder from {SNAC_MODEL_DIR}...")
+ model = snac.SNAC.from_pretrained(SNAC_MODEL_DIR)
+ model.eval()
+ return model
+
+
+def generate_tokens(text: str, voice: str = "tara") -> str:
+ """Call Ollama to generate audio tokens from text."""
+ prompt = f"<|begin_of_text|>{voice}: {text}<|eot_id|>"
+
+ payload = json.dumps({
+ "model": MODEL,
+ "prompt": prompt,
+ "stream": False,
+ "options": {
+ "temperature": 0.6,
+ "top_p": 0.9,
+ "repeat_penalty": 1.1,
+ "num_predict": 10240,
+ "stop": ["<|end_of_text|>"],
+ },
+ }).encode()
+
+ req = urllib.request.Request(
+ f"{OLLAMA_URL}/api/generate",
+ data=payload,
+ headers={"Content-Type": "application/json"},
+ )
+
+ print("Generating audio tokens via Ollama...")
+ t0 = time.time()
+ with urllib.request.urlopen(req, timeout=120) as resp:
+ result = json.loads(resp.read())
+
+ elapsed = time.time() - t0
+ response_text = result.get("response", "")
+ token_count = len(AUDIO_TOKEN_RE.findall(response_text))
+ print(f"Generated {token_count} audio tokens in {elapsed:.1f}s")
+ return response_text
+
+
+def decode_tokens(response_text: str, snac_model) -> tuple:
+ """Convert audio tokens to WAV audio."""
+ import torch
+
+ tokens = AUDIO_TOKEN_RE.findall(response_text)
+ if not tokens:
+ print("ERROR: No audio tokens found in response")
+ return None, 0
+
+ audio_ids = [
+ int(tok) - 10 - ((idx % 7) * 4096)
+ for idx, tok in enumerate(tokens)
+ ]
+
+ # Trim to multiple of 7
+ audio_ids = audio_ids[: len(audio_ids) // 7 * 7]
+ if len(audio_ids) == 0:
+ print("ERROR: Not enough audio tokens to decode")
+ return None, 0
+
+ audio_tensor = torch.tensor(audio_ids, dtype=torch.int32).reshape(-1, 7)
+ codes_0 = audio_tensor[:, 0].unsqueeze(0)
+ codes_1 = torch.stack((audio_tensor[:, 1], audio_tensor[:, 4])).t().flatten().unsqueeze(0)
+ codes_2 = (
+ torch.stack((audio_tensor[:, 2], audio_tensor[:, 3], audio_tensor[:, 5], audio_tensor[:, 6]))
+ .t()
+ .flatten()
+ .unsqueeze(0)
+ )
+
+ print("Decoding audio...")
+ with torch.inference_mode():
+ audio_hat = snac_model.decode([codes_0, codes_1, codes_2])
+
+ audio_np = audio_hat[0].squeeze().numpy()
+ return audio_np, 24000
+
+
+def save_wav(audio_np, sample_rate: int, path: str):
+ """Save numpy audio array as 16-bit WAV."""
+ import numpy as np
+
+ # Normalize to int16
+ audio_int16 = (audio_np * 32767).clip(-32768, 32767).astype(np.int16)
+
+ with wave.open(path, "w") as wf:
+ wf.setnchannels(1)
+ wf.setsampwidth(2)
+ wf.setframerate(sample_rate)
+ wf.writeframes(audio_int16.tobytes())
+
+ duration = len(audio_int16) / sample_rate
+ print(f"Saved {path} ({duration:.1f}s, {sample_rate} Hz)")
+
+
+def main():
+ print("=== Orpheus TTS Test (Ollama + SNAC) ===\n")
+
+ if not check_ollama():
+ return
+ if not check_snac():
+ return
+
+ snac_model = load_snac()
+
+ # Voices: tara, leah, jess, leo, dan, mia, zac, zoe
+ tests = [
+ ("Hello! This is Orpheus text to speech, running entirely on your Mac through Ollama.", "tara"),
+ (" That's amazing! Local AI speech generation without any cloud services!", "leo"),
+ ]
+
+ for i, (text, voice) in enumerate(tests):
+ print(f"\n--- Test {i+1}: voice={voice} ---")
+ print(f"Text: {text[:80]}...")
+
+ response = generate_tokens(text, voice)
+ audio, sr = decode_tokens(response, snac_model)
+
+ if audio is not None:
+ outpath = os.path.join(SCRIPT_DIR, f"test_orpheus_{voice}.wav")
+ save_wav(audio, sr, outpath)
+
+ print("\n=== Done! Open the .wav files to listen. ===")
+ print("Play with: afplay test_orpheus_tara.wav")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/__LOCAL_LLMs/test_qwen_tts.py b/__LOCAL_LLMs/test_qwen_tts.py
new file mode 100644
index 00000000..4db74545
--- /dev/null
+++ b/__LOCAL_LLMs/test_qwen_tts.py
@@ -0,0 +1,84 @@
+"""
+Test Qwen3-TTS 0.6B on Apple Silicon (MPS or CPU fallback).
+
+Prerequisites:
+ bash setup-tts.sh (one-shot: installs everything)
+ -- OR manually --
+ bash download-tts-models.sh (downloads models via hf-mirror.com)
+
+Usage:
+ .venv-qwen-tts/bin/python test_qwen_tts.py
+"""
+import os
+import time
+import torch
+import soundfile as sf
+from qwen_tts import Qwen3TTSModel
+
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+MODEL_PATH = os.path.join(SCRIPT_DIR, "models", "Qwen3-TTS-12Hz-0.6B-CustomVoice")
+
+# Check model exists locally
+if not os.path.isdir(MODEL_PATH):
+ print(f"ERROR: Model not found at {MODEL_PATH}")
+ print("Run: bash setup-tts.sh (or: bash download-tts-models.sh qwen)")
+ raise SystemExit(1)
+
+# Pick device: MPS if available, else CPU
+if torch.backends.mps.is_available():
+ device = "mps"
+ dtype = torch.float32 # MPS doesn't support bfloat16
+ print(f"Using MPS (Apple Metal GPU)")
+else:
+ device = "cpu"
+ dtype = torch.float32
+ print(f"Using CPU")
+
+print(f"Loading Qwen3-TTS-12Hz-0.6B-CustomVoice on {device}...")
+t0 = time.time()
+
+model = Qwen3TTSModel.from_pretrained(
+ MODEL_PATH,
+ device_map=device,
+ dtype=dtype,
+)
+
+print(f"Model loaded in {time.time() - t0:.1f}s")
+print(f"Supported speakers: {model.get_supported_speakers()}")
+print(f"Supported languages: {model.get_supported_languages()}")
+
+# Test 1: English with a built-in speaker
+text = "Hello! Welcome to the local LLM dashboard. I am Qwen three T T S, running entirely on your Mac."
+print(f"\nGenerating speech for: {text[:60]}...")
+
+t1 = time.time()
+wavs, sr = model.generate_custom_voice(
+ text=text,
+ language="English",
+ speaker="Chelsie",
+)
+elapsed = time.time() - t1
+print(f"Generated in {elapsed:.1f}s, sample rate={sr}, audio length={len(wavs[0])/sr:.1f}s")
+
+output_path = "test_output_english.wav"
+sf.write(output_path, wavs[0], sr)
+print(f"Saved to {output_path}")
+
+# Test 2: English with emotion instruction
+text2 = "This is absolutely incredible! I can't believe how well this works on a local machine!"
+print(f"\nGenerating with emotion: {text2[:60]}...")
+
+t2 = time.time()
+wavs2, sr2 = model.generate_custom_voice(
+ text=text2,
+ language="English",
+ speaker="Chelsie",
+ instruct="Speak with excitement and enthusiasm",
+)
+elapsed2 = time.time() - t2
+print(f"Generated in {elapsed2:.1f}s, audio length={len(wavs2[0])/sr2:.1f}s")
+
+sf.write("test_output_excited.wav", wavs2[0], sr2)
+print("Saved to test_output_excited.wav")
+
+print("\nDone! Open the .wav files to listen.")
diff --git a/__LOCAL_LLMs/windows_specific/razer-blade-18-spec.md b/__LOCAL_LLMs/windows_specific/razer-blade-18-spec.md
new file mode 100644
index 00000000..3919e6df
--- /dev/null
+++ b/__LOCAL_LLMs/windows_specific/razer-blade-18-spec.md
@@ -0,0 +1,387 @@
+Here is a complete engineering-grade specification document for the exact configuration you shared:
+
+⸻
+
+Razer Blade 18 (Model: RZ09-05299ER9-R3U1) — Detailed Specification Document
+
+Manufacturer: Razer Inc.
+Product Line: Blade Series
+Model Number: RZ09-05299ER9-R3U1
+Form Factor: High-performance desktop-class gaming & workstation laptop
+Release Generation: RTX 50-series era (2026)
+
+⸻
+
+1. System Overview
+
+The Razer Blade 18 is positioned as a flagship desktop-replacement laptop, integrating Intel Core Ultra HX processors, NVIDIA RTX 50-series GPUs, ultra-high refresh displays, and workstation-level memory/storage configurations. 
+
+Primary Target Use Cases
+• AAA gaming at maximum settings (4K, ray tracing)
+• AI / ML model development (local inference, CUDA workloads)
+• Software development & compilation
+• 3D rendering, Unreal Engine, Blender
+• Video editing (8K workflows)
+• Desktop replacement workstation
+
+⸻
+
+2. CPU (Processor)
+
+Processor: Intel® Core™ Ultra 9 275HX 
+
+Architecture
+
+Attribute Specification
+CPU family Intel Core Ultra HX Series
+Architecture Intel Meteor Lake / Arrow Lake HX class
+Core design Hybrid architecture
+Core types Performance cores + Efficient cores
+Target TDP ~55W base (HX class), scalable to ~157W turbo
+Fabrication Intel 3 / advanced node
+Integrated AI accelerator Intel NPU (Neural Processing Unit)
+
+Estimated core configuration (typical for Ultra 9 HX class)
+
+Core type Count
+Performance cores 8
+Efficient cores 16
+Total cores 24
+Threads 24
+
+AI acceleration
+
+Integrated:
+• Intel NPU
+• AVX-512 support
+• VNNI instructions
+• Hardware AI acceleration support
+
+Use cases:
+• Local AI inference
+• Background Copilot AI tasks
+• AI-assisted workflows
+
+⸻
+
+3. GPU (Graphics)
+
+Discrete GPU: NVIDIA GeForce RTX 5090 Laptop GPU 
+VRAM: 24 GB GDDR7 VRAM 
+
+⸻
+
+GPU Architecture
+
+Attribute Specification
+Architecture NVIDIA Blackwell (RTX 50-series)
+Memory type GDDR7
+VRAM size 24 GB
+CUDA cores Estimated ~18,000–20,000
+Ray tracing cores 4th or 5th Gen RT cores
+Tensor cores 5th or 6th Gen
+PCIe interface PCIe Gen 5
+DirectX support DirectX 12 Ultimate
+Vulkan support Yes
+OpenCL support Yes
+CUDA support Yes
+
+⸻
+
+GPU Compute Capability
+
+Feature Support
+CUDA compute Yes
+Tensor acceleration Yes
+DLSS DLSS 4
+Ray tracing Hardware accelerated
+AI inference Excellent
+Stable diffusion Excellent
+Local LLM inference Excellent
+
+⸻
+
+AI / ML Capability Estimate
+
+Model Expected Performance
+Llama 3 8B Real-time
+Llama 3 70B quantized Usable
+Stable Diffusion XL Very fast
+Whisper large Very fast
+TensorRT inference Excellent
+
+⸻
+
+4. RAM (Memory)
+
+Installed memory: 64 GB RAM 
+Memory speed: 5600 MHz 
+
+⸻
+
+Memory Details
+
+Attribute Specification
+Capacity 64 GB
+Type DDR5
+Speed 5600 MHz
+Channels Dual channel
+ECC No
+Upgradeability Yes (depends on configuration)
+
+⸻
+
+Memory bandwidth estimate
+
+~90–120 GB/sec
+
+⸻
+
+5. Storage
+
+Installed storage: 4 TB SSD (2 TB + 2 TB) 
+
+⸻
+
+Storage configuration
+
+Attribute Specification
+Total capacity 4 TB
+Drive type NVMe SSD
+Interface PCIe Gen 4 or Gen 5
+Configuration Dual SSD
+RAID support Possible
+Upgradeable Yes
+
+⸻
+
+Storage performance estimate
+
+Metric Expected
+Sequential read 7,000–14,000 MB/sec
+Sequential write 6,000–12,000 MB/sec
+Random IOPS >1 million
+
+⸻
+
+6. Display
+
+Display size: 18 inches 
+Display modes: Dual mode UHD+ 240 Hz / FHD+ 440 Hz 
+
+⸻
+
+Display detailed specifications
+
+Attribute Specification
+Size 18 inches
+Mode 1 resolution UHD+ (3840×2400)
+Mode 2 resolution FHD+ (1920×1200)
+Refresh rate (UHD+) 240 Hz
+Refresh rate (FHD+) 440 Hz
+Aspect ratio 16:10
+Panel type IPS or Mini-LED
+Adaptive sync Yes
+Response time <3 ms (estimated)
+HDR support Likely HDR 600–1000
+Color gamut 100% DCI-P3
+
+⸻
+
+Dual-mode display explanation
+
+Switchable between:
+
+Mode Use case
+UHD+ 240 Hz Visual quality, editing
+FHD+ 440 Hz Competitive gaming
+
+⸻
+
+7. Operating System
+
+OS: Windows 11 Home 
+
+Supports:
+• DirectX 12 Ultimate
+• WSL2
+• CUDA
+• AI frameworks
+
+⸻
+
+8. Cooling System
+
+Advanced vapor chamber cooling system.
+
+Expected features:
+• Vapor chamber cooling
+• Dual fan cooling
+• Liquid metal thermal interface
+• Advanced heat pipe network
+
+Supports sustained:
+• CPU ~120W+
+• GPU ~175W+
+
+⸻
+
+9. Connectivity & Ports (Expected for Blade 18)
+
+Typical Blade 18 includes:
+
+USB
+• 3× USB-A 3.2 Gen 2
+• 2× USB-C (Thunderbolt 4 / USB4)
+
+Video
+• HDMI 2.1
+• Thunderbolt video output
+
+Network
+• 2.5 Gb Ethernet
+
+Audio
+• 3.5 mm combo jack
+
+Storage expansion
+• Dual NVMe slots
+
+⸻
+
+10. Wireless Connectivity
+
+Expected:
+
+Technology Support
+Wi-Fi Wi-Fi 7
+Bluetooth Bluetooth 5.4
+
+⸻
+
+11. Power System
+
+Estimated:
+
+Attribute Specification
+Power adapter 330W–400W
+Battery ~90–99 Wh
+Charging Fast charging supported
+
+⸻
+
+12. Keyboard
+ • Per-key RGB lighting
+ • Razer Chroma support
+ • Anti-ghosting
+ • Full keyboard with numpad
+
+⸻
+
+13. Build and Physical
+
+Expected typical Blade 18 chassis:
+
+Attribute Specification
+Material CNC aluminum
+Color Black
+Thickness ~21–24 mm
+Weight ~3.1 kg
+
+⸻
+
+14. AI / ML Workstation Capability Assessment
+
+This laptop is excellent for:
+
+Supported workloads
+
+Workload Capability
+CUDA development Excellent
+Stable diffusion Excellent
+LLM inference Excellent
+PyTorch training Very good
+TensorFlow Excellent
+Unreal Engine Excellent
+Blender Excellent
+
+⸻
+
+15. Gaming Performance Tier
+
+Expected performance:
+
+Resolution Performance
+1080p Extreme
+1440p Extreme
+4K Excellent
+
+Ray tracing:
+• Excellent
+
+DLSS 4:
+• Supported
+
+⸻
+
+16. Storage Expansion Capability
+
+Supports:
+• Dual NVMe SSD
+• RAID configuration
+• Upgradable storage
+
+⸻
+
+17. Price
+
+Configured model price:
+
+$5,199.99 USD 
+
+⸻
+
+18. Performance Tier Classification
+
+Category Rating
+Gaming Extreme
+AI / ML Extreme
+Software development Extreme
+Video editing Extreme
+Desktop replacement Extreme
+
+⸻
+
+19. Ideal Use Cases for You Specifically (Saravana)
+
+Based on your AI engineering work:
+
+Perfect for:
+• Running multiple local LLMs
+• CUDA inference pipelines
+• Running LangGraph agents locally
+• Running Stable Diffusion
+• Running Whisper locally
+• Running multiple Docker containers
+• Unreal Engine / 3D
+
+⸻
+
+20. Summary
+
+This configuration is:
+
+One of the most powerful laptops available in the world currently
+
+Equivalent to:
+• High-end workstation
+• High-end desktop GPU
+• Enterprise-grade development machine
+
+⸻
+
+If you want, I can also create:
+
+• Full performance comparison vs desktop RTX 4090
+• Or benchmark estimates for LLM / Stable Diffusion / coding workloads
+• Or recommended optimal configuration for your home lab and multi-model setup
diff --git a/__LOCAL_LLMs/windows_specific/setup-guide.md b/__LOCAL_LLMs/windows_specific/setup-guide.md
new file mode 100644
index 00000000..aaa03932
--- /dev/null
+++ b/__LOCAL_LLMs/windows_specific/setup-guide.md
@@ -0,0 +1,372 @@
+# Windows Setup Guide — Local LLM Stack on Razer Blade 18
+
+> **Hardware:** Razer Blade 18 · Intel Core Ultra 9 275HX · RTX 5090 24 GB GDDR7 · 64 GB DDR5 · 4 TB NVMe
+> **OS:** Windows 11 Home
+> **Goal:** Mirror the macOS `__LOCAL_LLMs` stack — Ollama, Whisper, TTS (Orpheus + Qwen3), Mission Control dashboard
+> **See also:** [razer-blade-18-spec.md](razer-blade-18-spec.md) for full hardware specs
+
+---
+
+## Prerequisites
+
+### 1. Windows Package Manager
+
+Install **winget** (ships with Windows 11) and optionally **Scoop** for CLI tools:
+
+```powershell
+# Verify winget
+winget --version
+
+# Install Scoop (optional, useful for dev tools)
+Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
+Invoke-RestMethod -Uri https://get.scoop.sh | Invoke-Expression
+```
+
+### 2. NVIDIA CUDA Toolkit
+
+The RTX 5090 needs the latest CUDA drivers for GPU-accelerated inference.
+
+```powershell
+# Install NVIDIA drivers (latest Game Ready or Studio)
+winget install --id Nvidia.GeForceExperience
+
+# Install CUDA Toolkit (required for PyTorch CUDA)
+winget install --id Nvidia.CUDA
+# Or download from: https://developer.nvidia.com/cuda-downloads
+
+# Verify
+nvidia-smi
+```
+
+Expected output should show:
+
+- **RTX 5090** with **24 GB** VRAM
+- CUDA version 13.x+
+
+### 3. Node.js (for Mission Control Dashboard)
+
+```powershell
+winget install --id OpenJS.NodeJS.LTS
+# Verify
+node --version # should be 20.x+
+npm --version
+```
+
+### 4. Python 3.12
+
+```powershell
+winget install --id Python.Python.3.12
+# Verify
+python --version
+pip --version
+```
+
+### 5. Git
+
+```powershell
+winget install --id Git.Git
+```
+
+### 6. ffmpeg
+
+```powershell
+winget install --id Gyan.FFmpeg
+# Or: scoop install ffmpeg
+```
+
+---
+
+## 1. Ollama — LLM Server
+
+### Install
+
+```powershell
+winget install --id Ollama.Ollama
+```
+
+Ollama for Windows runs as a background service and automatically uses CUDA (RTX 5090).
+
+### Verify
+
+```powershell
+ollama --version
+curl http://localhost:11434/api/tags
+```
+
+### Download Models
+
+```powershell
+# Coding
+ollama pull qwen2.5-coder:32b # 19 GB — primary coding model
+ollama pull qwen2.5-coder:7b # 4.7 GB — fast coding
+
+# Reasoning
+ollama pull deepseek-r1:32b # 19 GB — chain-of-thought
+
+# General
+ollama pull llama3.1:8b # 4.9 GB — fast general tasks
+
+# TTS
+ollama pull sematre/orpheus:en # 4 GB — text-to-speech (8 voices)
+
+# Verify
+ollama list
+```
+
+> **Note:** With 24 GB VRAM, Ollama will offload 32B models almost entirely to GPU.
+> On macOS (48 GB unified), the 32B models run in shared CPU/GPU memory.
+> On this machine, **GPU inference will be significantly faster** for models that fit in 24 GB VRAM.
+
+### VRAM Budget (RTX 5090 — 24 GB)
+
+| Model | VRAM Usage | Fits in GPU? |
+| ---------------------------- | ---------- | ------------ |
+| llama3.1:8b | ~5 GB | ✅ Fully |
+| qwen2.5-coder:7b | ~5 GB | ✅ Fully |
+| sematre/orpheus:en | ~4 GB | ✅ Fully |
+| qwen2.5-coder:32b | ~19 GB | ✅ Fully |
+| deepseek-r1:32b | ~19 GB | ✅ Fully |
+| Two 7B models simultaneously | ~10 GB | ✅ Both fit |
+
+---
+
+## 2. Whisper.cpp — Speech-to-Text
+
+### Option A: Pre-built Binary (Recommended)
+
+Download the latest release from GitHub:
+
+```powershell
+# Create whisper directory
+mkdir "$env:USERPROFILE\whisper-cpp"
+cd "$env:USERPROFILE\whisper-cpp"
+
+# Download latest release (CUDA build)
+# Check: https://github.com/ggerganov/whisper.cpp/releases
+# Look for: whisper-cublas-bin-x64.zip or whisper-cuda-bin-x64.zip
+```
+
+### Option B: Build from Source (CUDA)
+
+```powershell
+git clone https://github.com/ggerganov/whisper.cpp.git
+cd whisper.cpp
+cmake -B build -DGGML_CUDA=ON
+cmake --build build --config Release
+```
+
+### Download Whisper Model
+
+```powershell
+mkdir "$env:USERPROFILE\whisper-models"
+
+# Download ggml-large-v3-turbo (1.5 GB)
+curl -L -o "$env:USERPROFILE\whisper-models\ggml-large-v3-turbo.bin" `
+ "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo.bin"
+```
+
+> **No corporate proxy on this machine** — download directly from `huggingface.co`.
+> The `hf-mirror.com` workaround is only needed on the corporate MacBook.
+
+### Verify
+
+```powershell
+# Test transcription
+whisper-cli -m "$env:USERPROFILE\whisper-models\ggml-large-v3-turbo.bin" -f test.wav
+```
+
+---
+
+## 3. TTS — Orpheus + Qwen3-TTS
+
+### 3a. Orpheus TTS (via Ollama)
+
+Already handled in Step 1 (`ollama pull sematre/orpheus:en`).
+
+### 3b. SNAC Decoder
+
+```powershell
+# Create models directory (match macOS layout)
+$MODELS = "$PSScriptRoot\models" # or wherever you clone the repo
+mkdir "$MODELS\snac_24khz" -Force
+
+# Download SNAC decoder
+curl -L -o "$MODELS\snac_24khz\config.json" `
+ "https://huggingface.co/hubertsiuzdak/snac_24khz/resolve/main/config.json"
+curl -L -o "$MODELS\snac_24khz\pytorch_model.bin" `
+ "https://huggingface.co/hubertsiuzdak/snac_24khz/resolve/main/pytorch_model.bin"
+```
+
+### 3c. Python Venv + Dependencies
+
+```powershell
+cd __LOCAL_LLMs
+
+# Create venv
+python -m venv .venv-qwen-tts
+
+# Activate (Windows uses Scripts, not bin)
+.\.venv-qwen-tts\Scripts\Activate.ps1
+
+# Install PyTorch with CUDA (NOT MPS — that's Apple only)
+pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
+
+# Install other deps
+pip install snac numpy soundfile
+
+# Verify CUDA
+python -c "import torch; print(f'CUDA: {torch.cuda.is_available()}, Device: {torch.cuda.get_device_name(0)}')"
+# Expected: CUDA: True, Device: NVIDIA GeForce RTX 5090 Laptop GPU
+```
+
+### 3d. Qwen3-TTS 0.6B
+
+```powershell
+$MODELS = ".\models"
+
+# Tokenizer (~650 MB)
+mkdir "$MODELS\Qwen3-TTS-Tokenizer-12Hz" -Force
+foreach ($f in @("config.json", "configuration.json", "preprocessor_config.json")) {
+ curl -L -o "$MODELS\Qwen3-TTS-Tokenizer-12Hz\$f" `
+ "https://huggingface.co/Qwen/Qwen3-TTS-Tokenizer-12Hz/resolve/main/$f"
+}
+curl -L -o "$MODELS\Qwen3-TTS-Tokenizer-12Hz\model.safetensors" `
+ "https://huggingface.co/Qwen/Qwen3-TTS-Tokenizer-12Hz/resolve/main/model.safetensors"
+
+# Model weights (~1.8 GB)
+mkdir "$MODELS\Qwen3-TTS-12Hz-0.6B-CustomVoice" -Force
+foreach ($f in @("config.json", "generation_config.json")) {
+ curl -L -o "$MODELS\Qwen3-TTS-12Hz-0.6B-CustomVoice\$f" `
+ "https://huggingface.co/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice/resolve/main/$f"
+}
+curl -L -o "$MODELS\Qwen3-TTS-12Hz-0.6B-CustomVoice\model.safetensors" `
+ "https://huggingface.co/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice/resolve/main/model.safetensors"
+```
+
+### 3e. Test TTS
+
+```powershell
+# Activate venv
+.\.venv-qwen-tts\Scripts\Activate.ps1
+
+# Orpheus TTS test
+python test_orpheus_tts.py
+
+# Qwen3-TTS test
+python test_qwen_tts.py
+```
+
+> **Key difference from macOS:** Qwen3-TTS will use **CUDA** instead of MPS.
+> In `test_qwen_tts.py`, the device selection `torch.device("mps")` will fall through to CUDA automatically
+> since `torch.backends.mps.is_available()` returns False on Windows.
+> You may want to update the device logic to prefer CUDA:
+>
+> ```python
+> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+> ```
+
+---
+
+## 4. Mission Control Dashboard
+
+```powershell
+cd __LOCAL_LLMs\dashboard
+
+# Install dependencies
+npm install
+
+# Start dev server
+npm run dev
+# Open http://localhost:3000
+```
+
+The dashboard is pure Next.js — works identically on Windows. The API routes auto-detect:
+
+- **Ollama** at `localhost:11434`
+- **Whisper** models in `%USERPROFILE%\whisper-models\`
+- **TTS** engines (Orpheus, Qwen3-TTS) and Python venv
+
+### Start Script (PowerShell)
+
+Use the bash script equivalent:
+
+```powershell
+# Quick start (manual)
+ollama serve # if not already running as service
+cd __LOCAL_LLMs\dashboard
+npm run dev
+```
+
+> TODO: Create `start-dashboard.ps1` as a PowerShell equivalent of `start-dashboard.sh`
+
+---
+
+## 5. Key Differences: macOS vs Windows
+
+| Area | macOS (M4 Pro 48 GB) | Windows (Razer Blade 18) |
+| ------------------- | ----------------------------------- | ------------------------------------- |
+| **GPU** | Apple Silicon (unified memory, MPS) | RTX 5090 (24 GB VRAM, CUDA) |
+| **Ollama GPU** | Automatic (Metal) | Automatic (CUDA) |
+| **VRAM** | Shared from 48 GB RAM | Dedicated 24 GB GDDR7 |
+| **PyTorch device** | `mps` | `cuda` |
+| **Whisper install** | `brew install whisper-cpp` | Build from source or download release |
+| **Python venv** | `bin/activate` | `Scripts\Activate.ps1` |
+| **Package manager** | Homebrew | winget / scoop |
+| **Shell** | zsh / bash | PowerShell / cmd |
+| **Scripts** | `.sh` (bash) | `.ps1` (PowerShell) |
+| **Model download** | `hf-mirror.com` (corporate proxy) | `huggingface.co` (no proxy) |
+| **Dashboard** | Identical | Identical |
+| **Ollama models** | Identical | Identical |
+
+### Performance Expectations
+
+| Workload | macOS M4 Pro 48 GB | Razer RTX 5090 24 GB |
+| --------------------------- | ---------------------------- | ------------------------- |
+| qwen2.5-coder:32b inference | ~15–25 tok/s (MPS/CPU blend) | ~40–60 tok/s (full CUDA) |
+| Whisper large-v3-turbo | ~2–4x realtime (CPU) | ~8–15x realtime (CUDA) |
+| Orpheus TTS | ~realtime (CPU decode) | ~2–3x realtime (CUDA) |
+| Qwen3-TTS | ~realtime (MPS) | ~2–4x realtime (CUDA) |
+| 70B quantized models | Fits in 48 GB (slow) | Partially offloads to RAM |
+
+---
+
+## 6. File Layout (Same as macOS)
+
+```
+__LOCAL_LLMs/
+├── dashboard/ ← Mission Control (port 3000) — works as-is
+├── models/ ← TTS model weights (gitignored)
+│ ├── snac_24khz/
+│ ├── Qwen3-TTS-Tokenizer-12Hz/
+│ └── Qwen3-TTS-12Hz-0.6B-CustomVoice/
+├── .venv-qwen-tts/ ← Python venv (Scripts\ on Windows)
+├── test_orpheus_tts.py ← works as-is (device fallback)
+├── test_qwen_tts.py ← update device to prefer CUDA
+├── windows_specific/
+│ ├── razer-blade-18-spec.md ← hardware spec
+│ └── setup-guide.md ← this file
+└── docs/ ← macOS-focused docs (still useful as reference)
+```
+
+---
+
+## 7. Quick Reference — Full Setup Checklist
+
+```
+[ ] Install NVIDIA drivers + CUDA Toolkit
+[ ] Install Ollama (winget install Ollama.Ollama)
+[ ] Pull models: qwen2.5-coder:32b, deepseek-r1:32b, llama3.1:8b, orpheus
+[ ] Install Node.js 20+ (winget)
+[ ] Install Python 3.12 (winget)
+[ ] Install Git (winget)
+[ ] Install ffmpeg (winget)
+[ ] Clone repo
+[ ] Download Whisper model to %USERPROFILE%\whisper-models\
+[ ] Build or download whisper-cpp with CUDA
+[ ] Create Python venv + install PyTorch CUDA + snac
+[ ] Download SNAC decoder
+[ ] Download Qwen3-TTS tokenizer + model
+[ ] npm install in dashboard/
+[ ] Run dashboard: npm run dev
+[ ] Verify: http://localhost:3000 shows all green
+```