fix(local-llms): cross-platform audit — 8 bugs/gaps fixed

- setup-tts.sh: make fully cross-platform (macOS + Linux/WSL2) - OS detection, apt fallback, CUDA PyTorch install, nvidia-smi check - cross-platform playback hints, HF_MIRROR env override - api/system/route.ts: fix ffmpeg detection (use -version not --version) - api/system/memory/route.ts: remove unused total variable in Linux path - api/system/exec/route.ts: expand allowlist with Linux commands (head, tail, grep, which, ps, uname, free, lscpu, nvidia-smi, etc.) - api/tts/route.ts: cross-platform venv path + CUDA/MPS label - api/whisper/route.ts: Linux binary/model paths - api/ollama/logs/route.ts: Linux log paths + WSL2 hint - test_qwen_tts.py: platform-aware speech text + CUDA device detection - test_orpheus_tts.py: platform-aware text, move import sys to top - setup-guide.md: fix false auto-detect claim, add HF_MIRROR hint
2026-02-21 15:27:49 -08:00 · 2026-02-21 15:27:49 -08:00 · b1d2e4ec81
commit b1d2e4ec81
parent f85b455eb5
10 changed files with 518 additions and 440 deletions
--- a/__LOCAL_LLMs/dashboard/src/app/api/ollama/logs/route.ts
+++ b/__LOCAL_LLMs/dashboard/src/app/api/ollama/logs/route.ts
@ -4,11 +4,15 @@ import { homedir } from 'os';
 import { join } from 'path';
 import { existsSync } from 'fs';

+const IS_MAC = process.platform === 'darwin';
+
 export async function GET() {
  const logPaths = [
    join(homedir(), '.ollama', 'logs', 'server.log'),
    join(homedir(), '.ollama', 'logs', 'gpu.log'),
    '/tmp/ollama.log',
+    // Linux / WSL2 — journalctl may write here
+    '/var/log/ollama.log',
  ];

  for (const logPath of logPaths) {
@ -25,11 +29,13 @@ export async function GET() {
    }
  }

-  // On macOS, Ollama logs via unified logging
+  // Fallback: platform-specific logging hint
+  const hint = IS_MAC
+    ? 'Ollama uses macOS unified logging. Use: log show --predicate \'subsystem == "com.ollama"\' --last 5m'
+    : 'Ollama logs not found. If running on Windows (accessed via WSL2), check Windows Event Viewer or: journalctl -u ollama --no-pager -n 50';
+
  return NextResponse.json({
-    lines: [
-      'Ollama uses macOS unified logging. Use: log show --predicate \'subsystem == "com.ollama"\' --last 5m',
-    ],
+    lines: [hint],
    path: 'system',
    total: 1,
  });
--- a/__LOCAL_LLMs/dashboard/src/app/api/system/exec/route.ts
+++ b/__LOCAL_LLMs/dashboard/src/app/api/system/exec/route.ts
@ -5,9 +5,9 @@ import { promisify } from 'util';
 const execFileAsync = promisify(execFile);

 const COMMAND_ALLOWLIST = new Set([
+  // Cross-platform (macOS + Linux)
  'git',
  'npm',
-  'brew',
  'cat',
  'ls',
  'wc',
@ -15,6 +15,21 @@ const COMMAND_ALLOWLIST = new Set([
  'df',
  'echo',
  'date',
+  'head',
+  'tail',
+  'grep',
+  'which',
+  'ps',
+  'uname',
+  'whoami',
+  // macOS
+  'brew',
+  // Linux / WSL2
+  'free',
+  'lscpu',
+  'nvidia-smi',
+  'dpkg',
+  'apt',
 ]);

 export async function POST(request: NextRequest) {
--- a/__LOCAL_LLMs/dashboard/src/app/api/system/memory/route.ts
+++ b/__LOCAL_LLMs/dashboard/src/app/api/system/memory/route.ts
@ -1,10 +1,13 @@
 import { NextResponse } from 'next/server';
 import { exec } from 'child_process';
 import { promisify } from 'util';
+import { readFile } from 'fs/promises';
 import os from 'os';

 const execAsync = promisify(exec);

+const IS_MAC = process.platform === 'darwin';
+
 interface ProcessInfo {
  pid: number;
  name: string;
@ -26,7 +29,7 @@ interface VmStatBreakdown {

 async function getTopProcesses(limit = 20): Promise<ProcessInfo[]> {
  try {
-    // ps with RSS in KB, sorted descending by RSS
+    // ps with RSS in KB, sorted descending by RSS — works on both macOS and Linux
    const { stdout } = await execAsync(
      `ps -axo pid=,rss=,%mem=,user=,comm= | sort -k2 -rn | head -${limit}`,
      { timeout: 3000 }
@ -60,36 +63,67 @@ async function getTopProcesses(limit = 20): Promise<ProcessInfo[]> {
 }

 async function getVmStatBreakdown(): Promise<VmStatBreakdown> {
-  try {
-    const { stdout } = await execAsync('vm_stat', { timeout: 2000 });
-    const pageSizeMatch = stdout.match(/page size of (\d+) bytes/);
-    const pageSize = pageSizeMatch ? parseInt(pageSizeMatch[1]) : 16384;
-    const parse = (label: string): number => {
-      const match = stdout.match(new RegExp(`${label}:\\s+(\\d+)`));
-      return match ? parseInt(match[1]) * pageSize : 0;
-    };
-    return {
-      active: parse('Pages active'),
-      wired: parse('Pages wired down'),
-      compressor: parse('Pages occupied by compressor'),
-      inactive: parse('Pages inactive'),
-      purgeable: parse('Pages purgeable'),
-      speculative: parse('Pages speculative'),
-      free: parse('Pages free'),
-      pageSize,
-    };
-  } catch {
-    return {
-      active: 0,
-      wired: 0,
-      compressor: 0,
-      inactive: 0,
-      purgeable: 0,
-      speculative: 0,
-      free: 0,
-      pageSize: 16384,
-    };
+  if (IS_MAC) {
+    try {
+      const { stdout } = await execAsync('vm_stat', { timeout: 2000 });
+      const pageSizeMatch = stdout.match(/page size of (\d+) bytes/);
+      const pageSize = pageSizeMatch ? parseInt(pageSizeMatch[1]) : 16384;
+      const parse = (label: string): number => {
+        const match = stdout.match(new RegExp(`${label}:\\s+(\\d+)`));
+        return match ? parseInt(match[1]) * pageSize : 0;
+      };
+      return {
+        active: parse('Pages active'),
+        wired: parse('Pages wired down'),
+        compressor: parse('Pages occupied by compressor'),
+        inactive: parse('Pages inactive'),
+        purgeable: parse('Pages purgeable'),
+        speculative: parse('Pages speculative'),
+        free: parse('Pages free'),
+        pageSize,
+      };
+    } catch {
+      // fall through to zeros
+    }
+  } else {
+    // Linux / WSL2 — parse /proc/meminfo into vm_stat-compatible structure
+    try {
+      const raw = await readFile('/proc/meminfo', 'utf-8');
+      const parse = (key: string): number => {
+        const match = raw.match(new RegExp(`${key}:\\s+(\\d+)`));
+        return match ? parseInt(match[1]) * 1024 : 0;
+      };
+      const free = parse('MemFree');
+      const buffers = parse('Buffers');
+      const cached = parse('Cached');
+      const sReclaimable = parse('SReclaimable');
+      const active = parse('Active');
+
+      return {
+        active,
+        wired: buffers, // closest analogy
+        compressor: parse('SwapCached'),
+        inactive: parse('Inactive'),
+        purgeable: sReclaimable,
+        speculative: 0,
+        free,
+        pageSize: 4096,
+      };
+    } catch {
+      // fall through to zeros
+    }
  }
+
+  return {
+    active: 0,
+    wired: 0,
+    compressor: 0,
+    inactive: 0,
+    purgeable: 0,
+    speculative: 0,
+    free: 0,
+    pageSize: IS_MAC ? 16384 : 4096,
+  };
 }

 export async function GET() {
--- a/__LOCAL_LLMs/dashboard/src/app/api/system/route.ts
+++ b/__LOCAL_LLMs/dashboard/src/app/api/system/route.ts
@ -1,11 +1,14 @@
 import { NextResponse } from 'next/server';
 import { exec, execFile } from 'child_process';
 import { promisify } from 'util';
+import { readFile } from 'fs/promises';
 import os from 'os';

 const execAsync = promisify(exec);
 const execFileAsync = promisify(execFile);

+const IS_MAC = process.platform === 'darwin';
+
 // Cache slow commands with TTL
 let staticCache: {
  chip: string;
@ -20,10 +23,17 @@ const OLLAMA_DISK_TTL = 60 * 1000; // 60 seconds

 async function getChipInfo(): Promise<string> {
  try {
+    if (IS_MAC) {
+      const { stdout } = await execAsync(
+        "sysctl -n machdep.cpu.brand_string 2>/dev/null || echo 'Unknown'"
+      );
+      return stdout.trim();
+    }
+    // Linux / WSL2
    const { stdout } = await execAsync(
-      "sysctl -n machdep.cpu.brand_string 2>/dev/null || echo 'Unknown'"
+      "lscpu 2>/dev/null | grep 'Model name' | sed 's/.*:\\s*//' || cat /proc/cpuinfo | grep 'model name' | head -1 | sed 's/.*: //'"
    );
-    return stdout.trim();
+    return stdout.trim() || 'Unknown';
  } catch {
    return 'Unknown';
  }
@ -55,30 +65,63 @@ async function getOllamaModelsDiskUsage(): Promise<number> {

 async function getGpuInfo(): Promise<string> {
  try {
+    if (IS_MAC) {
+      const { stdout } = await execAsync(
+        "system_profiler SPDisplaysDataType 2>/dev/null | grep 'Chipset Model' | sed 's/.*: //'",
+        { timeout: 5000 }
+      );
+      return stdout.trim() || 'Apple Silicon (integrated)';
+    }
+    // Linux / WSL2 — try nvidia-smi first, fall back to lspci
+    try {
+      const { stdout } = await execAsync(
+        'nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1',
+        { timeout: 3000 }
+      );
+      if (stdout.trim()) return stdout.trim();
+    } catch {
+      /* no nvidia-smi */
+    }
    const { stdout } = await execAsync(
-      "system_profiler SPDisplaysDataType 2>/dev/null | grep 'Chipset Model' | sed 's/.*: //'",
-      { timeout: 5000 }
+      "lspci 2>/dev/null | grep -i 'vga\\|3d' | sed 's/.*: //' | head -1"
    );
-    return stdout.trim() || 'Apple Silicon (integrated)';
+    return stdout.trim() || 'Unknown';
  } catch {
-    return 'Apple Silicon (integrated)';
+    return IS_MAC ? 'Apple Silicon (integrated)' : 'Unknown';
  }
 }

 async function getBrewPackages(): Promise<Array<{ name: string; version: string }>> {
  const targets = ['ollama', 'whisper-cpp', 'ffmpeg'];
  const results: Array<{ name: string; version: string }> = [];
-  for (const pkg of targets) {
-    try {
-      const { stdout } = await execFileAsync('brew', ['list', '--versions', pkg], {
-        timeout: 3000,
-      });
-      const parts = stdout.trim().split(' ');
-      if (parts.length >= 2) {
-        results.push({ name: parts[0], version: parts.slice(1).join(' ') });
+
+  if (IS_MAC) {
+    for (const pkg of targets) {
+      try {
+        const { stdout } = await execFileAsync('brew', ['list', '--versions', pkg], {
+          timeout: 3000,
+        });
+        const parts = stdout.trim().split(' ');
+        if (parts.length >= 2) {
+          results.push({ name: parts[0], version: parts.slice(1).join(' ') });
+        }
+      } catch {
+        // not installed
+      }
+    }
+  } else {
+    // Linux / WSL2 — check via version commands (ffmpeg uses -version, others use --version)
+    for (const pkg of targets) {
+      const bin = pkg === 'whisper-cpp' ? 'whisper-cli' : pkg;
+      const flag = bin === 'ffmpeg' ? '-version' : '--version';
+      try {
+        const { stdout } = await execAsync(`${bin} ${flag} 2>&1 | head -1`, { timeout: 3000 });
+        if (stdout.trim()) {
+          results.push({ name: pkg, version: stdout.trim() });
+        }
+      } catch {
+        // not installed
      }
-    } catch {
-      // not installed
    }
  }
  return results;
@ -106,7 +149,8 @@ async function getCachedOllamaDiskUsage(): Promise<number> {
  return value;
 }

-// macOS vm_stat gives accurate memory breakdown (os.freemem() excludes reclaimable cache)
+// macOS: vm_stat gives accurate memory breakdown (os.freemem() excludes reclaimable cache)
+// Linux: /proc/meminfo gives accurate breakdown
 async function getAccurateMemory(): Promise<{
  total: number;
  appMemory: number;
@ -115,42 +159,66 @@ async function getAccurateMemory(): Promise<{
  pressure: string;
 }> {
  const totalMem = os.totalmem();
-  try {
-    const { stdout } = await execAsync('vm_stat', { timeout: 2000 });
-    const pageSizeMatch = stdout.match(/page size of (\d+) bytes/);
-    const pageSize = pageSizeMatch ? parseInt(pageSizeMatch[1]) : 16384;
-    const parse = (label: string): number => {
-      const match = stdout.match(new RegExp(`${label}:\\s+(\\d+)`));
-      return match ? parseInt(match[1]) * pageSize : 0;
-    };
-    const active = parse('Pages active');
-    const wired = parse('Pages wired down');
-    const inactive = parse('Pages inactive');
-    const purgeable = parse('Pages purgeable');
-    const speculative = parse('Pages speculative');
-    const free = parse('Pages free');
-    const compressor = parse('Pages occupied by compressor');

-    const appMemory = active + wired + compressor;
-    const cached = inactive + purgeable + speculative;
-    // Return raw free separately from cached — no overlap
-    // available for loading = free + cached (macOS reclaims cached on demand)
+  if (IS_MAC) {
+    try {
+      const { stdout } = await execAsync('vm_stat', { timeout: 2000 });
+      const pageSizeMatch = stdout.match(/page size of (\d+) bytes/);
+      const pageSize = pageSizeMatch ? parseInt(pageSizeMatch[1]) : 16384;
+      const parse = (label: string): number => {
+        const match = stdout.match(new RegExp(`${label}:\\s+(\\d+)`));
+        return match ? parseInt(match[1]) * pageSize : 0;
+      };
+      const active = parse('Pages active');
+      const wired = parse('Pages wired down');
+      const inactive = parse('Pages inactive');
+      const purgeable = parse('Pages purgeable');
+      const speculative = parse('Pages speculative');
+      const free = parse('Pages free');
+      const compressor = parse('Pages occupied by compressor');

-    const ratio = appMemory / totalMem;
-    const pressure = ratio > 0.85 ? 'critical' : ratio > 0.7 ? 'warning' : 'normal';
+      const appMemory = active + wired + compressor;
+      const cached = inactive + purgeable + speculative;

-    return { total: totalMem, appMemory, cached, free, pressure };
-  } catch {
-    // Fallback to Node.js (inaccurate on macOS but works everywhere)
-    const freeMem = os.freemem();
-    return {
-      total: totalMem,
-      appMemory: totalMem - freeMem,
-      cached: 0,
-      free: freeMem,
-      pressure: 'unknown',
-    };
+      const ratio = appMemory / totalMem;
+      const pressure = ratio > 0.85 ? 'critical' : ratio > 0.7 ? 'warning' : 'normal';
+
+      return { total: totalMem, appMemory, cached, free, pressure };
+    } catch {
+      // fall through to generic fallback
+    }
+  } else {
+    // Linux / WSL2 — parse /proc/meminfo
+    try {
+      const raw = await readFile('/proc/meminfo', 'utf-8');
+      const parse = (key: string): number => {
+        const match = raw.match(new RegExp(`${key}:\\s+(\\d+)`));
+        return match ? parseInt(match[1]) * 1024 : 0; // /proc/meminfo is in kB
+      };
+      const total = parse('MemTotal');
+      const free = parse('MemFree');
+      const buffers = parse('Buffers');
+      const cached = parse('Cached') + parse('SReclaimable') + buffers;
+      const appMemory = total - free - cached;
+
+      const ratio = appMemory / total;
+      const pressure = ratio > 0.85 ? 'critical' : ratio > 0.7 ? 'warning' : 'normal';
+
+      return { total, appMemory, cached, free, pressure };
+    } catch {
+      // fall through to generic fallback
+    }
  }
+
+  // Generic fallback (works everywhere but less accurate)
+  const freeMem = os.freemem();
+  return {
+    total: totalMem,
+    appMemory: totalMem - freeMem,
+    cached: 0,
+    free: freeMem,
+    pressure: 'unknown',
+  };
 }

 export async function GET() {
--- a/__LOCAL_LLMs/dashboard/src/app/api/tts/route.ts
+++ b/__LOCAL_LLMs/dashboard/src/app/api/tts/route.ts
@ -6,9 +6,14 @@ import { join, resolve } from 'path';

 const execAsync = promisify(exec);

+const IS_MAC = process.platform === 'darwin';
+
 // process.cwd() = dashboard/, parent = __LOCAL_LLMs/
 const LOCAL_LLMS_DIR = resolve(process.cwd(), '..');

+// macOS/Linux: bin/python, Windows native: Scripts/python.exe
+const VENV_PYTHON = join(LOCAL_LLMS_DIR, '.venv-qwen-tts', 'bin', 'python');
+
 interface TtsEngine {
  name: string;
  type: 'ollama' | 'python';
@ -67,8 +72,7 @@ async function checkOrpheus(): Promise<TtsEngine> {
  const snacSize = hasSnac ? await getFileSize(snacPath) : 0;

  // Check Python venv
-  const venvPython = join(LOCAL_LLMS_DIR, '.venv-qwen-tts', 'bin', 'python');
-  const hasVenv = await fileExists(venvPython);
+  const hasVenv = await fileExists(VENV_PYTHON);

  if (hasModel && hasSnac && hasVenv) {
    engine.status = 'ready';
@ -114,13 +118,14 @@ async function checkQwenTts(): Promise<TtsEngine> {
  }

  const hasTokenizer = await fileExists(join(tokenizerDir, 'config.json'));
-  const venvPython = join(LOCAL_LLMS_DIR, '.venv-qwen-tts', 'bin', 'python');
-  const hasVenv = await fileExists(venvPython);
+  const hasVenv = await fileExists(VENV_PYTHON);

  if (hasModel && hasTokenizer && hasVenv) {
    engine.status = 'ready';
    engine.size = `${(modelSize / 1e9).toFixed(1)} GB`;
-    engine.details = '0.6B params · 10 languages · MPS/CPU';
+    engine.details = IS_MAC
+      ? '0.6B params · 10 languages · MPS/CPU'
+      : '0.6B params · 10 languages · CUDA/CPU';
  } else if (hasModel || hasTokenizer) {
    engine.status = 'partial';
    const missing: string[] = [];
@ -141,22 +146,21 @@ async function checkVenv(): Promise<{
  python?: string;
  packages?: string[];
 }> {
-  const venvPython = join(LOCAL_LLMS_DIR, '.venv-qwen-tts', 'bin', 'python');
-  const exists = await fileExists(venvPython);
+  const exists = await fileExists(VENV_PYTHON);
  if (!exists) return { exists: false };

  try {
    const { stdout } = await execAsync(
-      `"${venvPython}" -c "import snac; import torch; print(f'snac={snac.__version__} torch={torch.__version__}')"`,
+      `"${VENV_PYTHON}" -c "import snac; import torch; print(f'snac={snac.__version__} torch={torch.__version__}')"`,
      { timeout: 5000 }
    );
    return {
      exists: true,
-      python: venvPython,
+      python: VENV_PYTHON,
      packages: stdout.trim().split(' '),
    };
  } catch {
-    return { exists: true, python: venvPython };
+    return { exists: true, python: VENV_PYTHON };
  }
 }

--- a/__LOCAL_LLMs/dashboard/src/app/api/whisper/route.ts
+++ b/__LOCAL_LLMs/dashboard/src/app/api/whisper/route.ts
@ -7,9 +7,22 @@ import { homedir } from 'os';

 const execAsync = promisify(exec);

+const IS_MAC = process.platform === 'darwin';
+
 async function getWhisperBinaries(): Promise<string[]> {
  try {
-    const { stdout } = await execAsync('ls /opt/homebrew/bin/whisper-* 2>/dev/null');
+    if (IS_MAC) {
+      const { stdout } = await execAsync('ls /opt/homebrew/bin/whisper-* 2>/dev/null');
+      return stdout
+        .trim()
+        .split('\n')
+        .filter(Boolean)
+        .map(p => p.split('/').pop() || p);
+    }
+    // Linux / WSL2 — check common locations
+    const { stdout } = await execAsync(
+      'ls /usr/local/bin/whisper-* /usr/bin/whisper-* 2>/dev/null || which whisper-cli 2>/dev/null'
+    );
    return stdout
      .trim()
      .split('\n')
@ -25,7 +38,9 @@ const WHISPER_MODEL_DIRS = (process.env.WHISPER_MODELS_DIR || '')
  .filter(Boolean)
  .concat([
    join(homedir(), 'whisper-models'),
-    '/opt/homebrew/share/whisper-cpp/models',
+    ...(IS_MAC
+      ? ['/opt/homebrew/share/whisper-cpp/models']
+      : ['/usr/local/share/whisper-cpp/models', '/usr/share/whisper-cpp/models']),
    join(homedir(), '.cache', 'whisper'),
  ]);

--- a/__LOCAL_LLMs/setup-tts.sh
+++ b/__LOCAL_LLMs/setup-tts.sh
@ -3,26 +3,24 @@
 # TTS Setup — One-Shot Script for Fresh Laptop
 #
 # Sets up Orpheus TTS (via Ollama) and Qwen3-TTS (direct Python)
-# on Apple Silicon Macs. Works through corporate proxy.
+# on macOS (Apple Silicon) or Linux (CUDA GPU / WSL2).
 #
 # What this does:
-#   1. Installs Python 3.12 via Homebrew (if missing)
-#   2. Creates Python venv with TTS packages
+#   1. Installs Python 3.12 (Homebrew on macOS, apt on Linux)
+#   2. Creates Python venv with TTS packages (MPS on macOS, CUDA on Linux)
 #   3. Pulls Orpheus TTS model via Ollama
-#   4. Downloads SNAC audio decoder via hf-mirror.com
-#   5. (Optional) Downloads Qwen3-TTS 0.6B via hf-mirror.com
+#   4. Downloads SNAC audio decoder
+#   5. (Optional) Downloads Qwen3-TTS 0.6B
 #
 # Prerequisites:
-#   - macOS with Apple Silicon (M1/M2/M3/M4)
-#   - Homebrew installed
-#   - Ollama installed (brew install ollama)
+#   macOS: Homebrew + Ollama installed
+#   Linux: apt + Ollama accessible at localhost:11434
 #
 # Usage:
 #   bash setup-tts.sh
 #
 # After setup, test with:
 #   .venv-qwen-tts/bin/python test_orpheus_tts.py
-#   afplay test_orpheus_tara.wav
 # ============================================================
 set -e

@ -31,7 +29,13 @@ VENV="$SCRIPT_DIR/.venv-qwen-tts"
 MODELS_DIR="$SCRIPT_DIR/models"

 # HuggingFace mirror that works through corporate proxy
-HF_MIRROR="https://hf-mirror.com"
+# On personal machines, set HF_MIRROR=https://huggingface.co to download directly
+HF_MIRROR="${HF_MIRROR:-https://hf-mirror.com}"
+
+# Detect OS
+OS_TYPE="$(uname -s)"
+IS_MAC=false
+[ "$OS_TYPE" = "Darwin" ] && IS_MAC=true

 RED='\033[0;31m'
 GREEN='\033[0;32m'
@ -52,34 +56,58 @@ echo ""
 # ── 0. Check prerequisites ──────────────────────────────────
 step "Checking prerequisites"

-# Homebrew
-if ! command -v brew &>/dev/null; then
-    fail "Homebrew not found. Install: /bin/bash -c \"\$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)\""
-fi
-ok "Homebrew"
+if $IS_MAC; then
+    # Homebrew
+    if ! command -v brew &>/dev/null; then
+        fail "Homebrew not found. Install: /bin/bash -c \"\$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)\""
+    fi
+    ok "Homebrew"

-# Ollama
-if ! command -v ollama &>/dev/null; then
-    warn "Ollama not found. Installing..."
-    brew install ollama
+    # Ollama (install via Homebrew if missing)
+    if ! command -v ollama &>/dev/null; then
+        warn "Ollama not found. Installing..."
+        brew install ollama
+    fi
+else
+    # Linux / WSL2 — Ollama should be installed on host or via install script
+    if ! command -v ollama &>/dev/null; then
+        # On WSL2 Ollama runs on the Windows side; check if reachable
+        if ! curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then
+            fail "Ollama not found and not reachable at localhost:11434. Install Ollama on Windows or run: curl -fsSL https://ollama.com/install.sh | sh"
+        fi
+        ok "Ollama reachable at localhost:11434 (Windows host)"
+    fi
 fi
 ok "Ollama installed"

 # Check if Ollama is running
 if ! curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then
    warn "Ollama not running. Starting..."
-    ollama serve &>/dev/null &
-    sleep 3
+    if command -v ollama &>/dev/null; then
+        ollama serve &>/dev/null &
+        sleep 3
+    fi
    if ! curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then
        fail "Could not start Ollama. Try manually: ollama serve"
    fi
 fi
 ok "Ollama running on port 11434"

-# Apple Silicon check
+# GPU check
 ARCH=$(uname -m)
-if [ "$ARCH" != "arm64" ]; then
-    warn "Not Apple Silicon ($ARCH). MPS acceleration won't be available."
+if $IS_MAC; then
+    if [ "$ARCH" != "arm64" ]; then
+        warn "Not Apple Silicon ($ARCH). MPS acceleration won't be available."
+    else
+        ok "Apple Silicon ($ARCH) — MPS acceleration available"
+    fi
+else
+    if command -v nvidia-smi &>/dev/null; then
+        GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1)
+        ok "NVIDIA GPU detected: $GPU_NAME — CUDA acceleration available"
+    else
+        warn "nvidia-smi not found. CUDA acceleration won't be available (CPU fallback)."
+    fi
 fi

 # ── 1. Install Python 3.12 ──────────────────────────────────
@ -87,7 +115,7 @@ step "Python 3.12"

 PYTHON_CMD=""
 # Check various Python 3.12 locations
-for cmd in python3.12 /opt/homebrew/bin/python3.12 /usr/local/bin/python3.12; do
+for cmd in python3.12 /opt/homebrew/bin/python3.12 /usr/local/bin/python3.12 python3; do
    if command -v "$cmd" &>/dev/null; then
        PYTHON_CMD="$cmd"
        break
@ -95,9 +123,15 @@ for cmd in python3.12 /opt/homebrew/bin/python3.12 /usr/local/bin/python3.12; do
 done

 if [ -z "$PYTHON_CMD" ]; then
-    warn "Python 3.12 not found. Installing via Homebrew..."
-    brew install python@3.12
-    PYTHON_CMD="/opt/homebrew/bin/python3.12"
+    if $IS_MAC; then
+        warn "Python 3.12 not found. Installing via Homebrew..."
+        brew install python@3.12
+        PYTHON_CMD="/opt/homebrew/bin/python3.12"
+    else
+        warn "Python 3.12 not found. Installing via apt..."
+        sudo apt update && sudo apt install -y python3.12 python3.12-venv python3-pip
+        PYTHON_CMD="python3.12"
+    fi
 fi

 PYTHON_VER=$("$PYTHON_CMD" --version 2>&1)
@ -123,7 +157,15 @@ if "$VENV/bin/python" -c "import snac" &>/dev/null; then
 else
    echo "Installing packages (this may take a few minutes)..."
    "$VENV/bin/pip" install -U pip --quiet
-    "$VENV/bin/pip" install -U snac qwen-tts --quiet
+    if $IS_MAC; then
+        # macOS: default PyTorch includes MPS support
+        "$VENV/bin/pip" install -U snac qwen-tts --quiet
+    else
+        # Linux: install PyTorch with CUDA first, then snac/qwen-tts
+        echo "Installing PyTorch with CUDA support..."
+        "$VENV/bin/pip" install -U torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124 --quiet
+        "$VENV/bin/pip" install -U snac qwen-tts --quiet
+    fi
    ok "Packages installed"
 fi

@ -144,7 +186,11 @@ step "SNAC 24kHz audio decoder (~76 MB)"
 mkdir -p "$MODELS_DIR/snac_24khz"

 if [ -f "$MODELS_DIR/snac_24khz/pytorch_model.bin" ]; then
-    SIZE=$(stat -f%z "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null || stat -c%s "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null)
+    if $IS_MAC; then
+        SIZE=$(stat -f%z "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null)
+    else
+        SIZE=$(stat -c%s "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null)
+    fi
    if [ "$SIZE" -gt 1000000 ]; then
        ok "SNAC decoder already downloaded ($(echo "scale=1; $SIZE/1048576" | bc) MB)"
    else
@ -247,7 +293,11 @@ du -sh "$MODELS_DIR"/* 2>/dev/null | sed 's/^/  /'
 echo ""
 echo "Test commands:"
 echo "  $VENV/bin/python $SCRIPT_DIR/test_orpheus_tts.py"
-echo "  afplay test_orpheus_tara.wav"
+if $IS_MAC; then
+    echo "  afplay test_orpheus_tara.wav"
+else
+    echo "  aplay test_orpheus_tara.wav  (or: ffplay -nodisp -autoexit test_orpheus_tara.wav)"
+fi
 if [ -d "$QWEN_MODEL_DIR" ]; then
    echo "  $VENV/bin/python $SCRIPT_DIR/test_qwen_tts.py"
 fi
--- a/__LOCAL_LLMs/test_orpheus_tts.py
+++ b/__LOCAL_LLMs/test_orpheus_tts.py
@ -13,6 +13,7 @@ Usage:
 """
 import os
 import re
+import sys
 import time
 import json
 import struct
@ -166,7 +167,7 @@ def main():

    # Voices: tara, leah, jess, leo, dan, mia, zac, zoe
    tests = [
-        ("Hello! This is Orpheus text to speech, running entirely on your Mac through Ollama.", "tara"),
+        ("Hello! This is Orpheus text to speech, running entirely locally through Ollama.", "tara"),
        ("<laugh> That's amazing! Local AI speech generation without any cloud services!", "leo"),
    ]

@ -182,7 +183,10 @@ def main():
            save_wav(audio, sr, outpath)

    print("\n=== Done! Open the .wav files to listen. ===")
-    print("Play with:  afplay test_orpheus_tara.wav")
+    if sys.platform == "darwin":
+        print("Play with:  afplay test_orpheus_tara.wav")
+    else:
+        print("Play with:  aplay test_orpheus_tara.wav  (or: ffplay -nodisp -autoexit test_orpheus_tara.wav)")


 if __name__ == "__main__":
--- a/__LOCAL_LLMs/test_qwen_tts.py
+++ b/__LOCAL_LLMs/test_qwen_tts.py
@ -1,5 +1,5 @@
 """
-Test Qwen3-TTS 0.6B on Apple Silicon (MPS or CPU fallback).
+Test Qwen3-TTS 0.6B (CUDA, MPS, or CPU fallback).

 Prerequisites:
  bash setup-tts.sh              (one-shot: installs everything)
@ -24,8 +24,12 @@ if not os.path.isdir(MODEL_PATH):
    print("Run: bash setup-tts.sh   (or: bash download-tts-models.sh qwen)")
    raise SystemExit(1)

-# Pick device: MPS if available, else CPU
-if torch.backends.mps.is_available():
+# Pick device: CUDA > MPS > CPU
+if torch.cuda.is_available():
+    device = "cuda"
+    dtype = torch.float16
+    print(f"Using CUDA ({torch.cuda.get_device_name(0)})")
+elif torch.backends.mps.is_available():
    device = "mps"
    dtype = torch.float32  # MPS doesn't support bfloat16
    print(f"Using MPS (Apple Metal GPU)")
@ -48,7 +52,7 @@ print(f"Supported speakers: {model.get_supported_speakers()}")
 print(f"Supported languages: {model.get_supported_languages()}")

 # Test 1: English with a built-in speaker
-text = "Hello! Welcome to the local LLM dashboard. I am Qwen three T T S, running entirely on your Mac."
+text = f"Hello! Welcome to the local LLM dashboard. I am Qwen three T T S, running entirely on your {'Mac' if device == 'mps' else 'machine'} using {device.upper()}."
 print(f"\nGenerating speech for: {text[:60]}...")

 t1 = time.time()
--- a/__LOCAL_LLMs/windows_specific/setup-guide.md
+++ b/__LOCAL_LLMs/windows_specific/setup-guide.md
@ -1,372 +1,250 @@
 # Windows Setup Guide — Local LLM Stack on Razer Blade 18

 > **Hardware:** Razer Blade 18 · Intel Core Ultra 9 275HX · RTX 5090 24 GB GDDR7 · 64 GB DDR5 · 4 TB NVMe
-> **OS:** Windows 11 Home
+> **OS:** Windows 11 Home + WSL2 (Ubuntu)
 > **Goal:** Mirror the macOS `__LOCAL_LLMs` stack — Ollama, Whisper, TTS (Orpheus + Qwen3), Mission Control dashboard
 > **See also:** [razer-blade-18-spec.md](razer-blade-18-spec.md) for full hardware specs

 ---

-## Prerequisites
+## Architecture: Windows-Native + WSL2

-### 1. Windows Package Manager
-
-Install **winget** (ships with Windows 11) and optionally **Scoop** for CLI tools:
-
-```powershell
-# Verify winget
-winget --version
-
-# Install Scoop (optional, useful for dev tools)
-Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
-Invoke-RestMethod -Uri https://get.scoop.sh | Invoke-Expression
+```
+┌────────────────────────────────────────────────────────┐
+│  Windows 11                                            │
+│  ├── NVIDIA drivers + CUDA (native)                    │
+│  ├── Ollama (native Windows service, port 11434)       │
+│  └── Browser → http://localhost:3000                   │
+│                                                        │
+│  ┌──────────────────────────────────────────────────┐  │
+│  │  WSL2 (Ubuntu 24.04)                             │  │
+│  │  ├── Node.js, Python 3.12, ffmpeg, git           │  │
+│  │  ├── __LOCAL_LLMs/ (cloned here)                 │  │
+│  │  │   ├── dashboard/ → npm run dev (port 3000)    │  │
+│  │  │   ├── setup-tts.sh    (works as-is)           │  │
+│  │  │   ├── start-dashboard.sh (works as-is)        │  │
+│  │  │   └── models/ (SNAC, Qwen3-TTS)              │  │
+│  │  ├── whisper-cpp (CUDA build)                    │  │
+│  │  └── .venv-qwen-tts/ (PyTorch CUDA)             │  │
+│  └──────────────────────────────────────────────────┘  │
+└────────────────────────────────────────────────────────┘
 ```

-### 2. NVIDIA CUDA Toolkit
+**Why WSL2?** All existing bash scripts, Python venvs, and Node.js tooling work identically to macOS — zero porting. The dashboard API routes auto-detect macOS vs Linux at runtime via `process.platform`.

-The RTX 5090 needs the latest CUDA drivers for GPU-accelerated inference.
+---
+
+## Phase 1: Windows-Native Setup
+
+### 1. NVIDIA Drivers

 ```powershell
-# Install NVIDIA drivers (latest Game Ready or Studio)
-winget install --id Nvidia.GeForceExperience
-
-# Install CUDA Toolkit (required for PyTorch CUDA)
-winget install --id Nvidia.CUDA
-# Or download from: https://developer.nvidia.com/cuda-downloads
+# Install latest NVIDIA Game Ready or Studio drivers
+# Download from: https://www.nvidia.com/Download/index.aspx

 # Verify
 nvidia-smi
+# Should show: RTX 5090, 24 GB VRAM, CUDA 13.x+
 ```

-Expected output should show:
+### 2. Ollama (Windows-Native)

- **RTX 5090** with **24 GB** VRAM
- CUDA version 13.x+
-
-### 3. Node.js (for Mission Control Dashboard)
-
-```powershell
-winget install --id OpenJS.NodeJS.LTS
-# Verify
-node --version   # should be 20.x+
-npm --version
-```
-
-### 4. Python 3.12
-
-```powershell
-winget install --id Python.Python.3.12
-# Verify
-python --version
-pip --version
-```
-
-### 5. Git
-
-```powershell
-winget install --id Git.Git
-```
-
-### 6. ffmpeg
-
-```powershell
-winget install --id Gyan.FFmpeg
-# Or: scoop install ffmpeg
-```
-
---
-
-## 1. Ollama — LLM Server
-
-### Install
+Ollama runs natively on Windows and is accessible from WSL2 at `localhost:11434`.

 ```powershell
 winget install --id Ollama.Ollama
-```
-
-Ollama for Windows runs as a background service and automatically uses CUDA (RTX 5090).
-
-### Verify
-
-```powershell
-ollama --version
-curl http://localhost:11434/api/tags
-```
-
-### Download Models
-
-```powershell
-# Coding
-ollama pull qwen2.5-coder:32b     # 19 GB — primary coding model
-ollama pull qwen2.5-coder:7b      # 4.7 GB — fast coding
-
-# Reasoning
-ollama pull deepseek-r1:32b       # 19 GB — chain-of-thought
-
-# General
-ollama pull llama3.1:8b            # 4.9 GB — fast general tasks
-
-# TTS
-ollama pull sematre/orpheus:en    # 4 GB — text-to-speech (8 voices)

 # Verify
-ollama list
+ollama --version
 ```

-> **Note:** With 24 GB VRAM, Ollama will offload 32B models almost entirely to GPU.
-> On macOS (48 GB unified), the 32B models run in shared CPU/GPU memory.
-> On this machine, **GPU inference will be significantly faster** for models that fit in 24 GB VRAM.
+### 3. Pull Models (from Windows or WSL2)

-### VRAM Budget (RTX 5090 — 24 GB)
+```bash
+ollama pull qwen2.5-coder:32b     # 19 GB — primary coding model
+ollama pull qwen2.5-coder:7b      # 4.7 GB — fast coding
+ollama pull deepseek-r1:32b       # 19 GB — chain-of-thought
+ollama pull llama3.1:8b            # 4.9 GB — fast general tasks
+ollama pull sematre/orpheus:en    # 4 GB — text-to-speech (8 voices)

-| Model                        | VRAM Usage | Fits in GPU? |
-| ---------------------------- | ---------- | ------------ |
-| llama3.1:8b                  | ~5 GB      | ✅ Fully     |
-| qwen2.5-coder:7b             | ~5 GB      | ✅ Fully     |
-| sematre/orpheus:en           | ~4 GB      | ✅ Fully     |
-| qwen2.5-coder:32b            | ~19 GB     | ✅ Fully     |
-| deepseek-r1:32b              | ~19 GB     | ✅ Fully     |
-| Two 7B models simultaneously | ~10 GB     | ✅ Both fit  |
+ollama list    # verify all 5 models
+```
+
+### 4. Install WSL2
+
+```powershell
+# From PowerShell (Admin)
+wsl --install -d Ubuntu-24.04
+# Reboot if prompted, then set up username/password
+```

 ---

-## 2. Whisper.cpp — Speech-to-Text
+## Phase 2: WSL2 Setup

-### Option A: Pre-built Binary (Recommended)
+### 1. Install Dependencies

-Download the latest release from GitHub:
+```bash
+# Update
+sudo apt update && sudo apt upgrade -y

-```powershell
-# Create whisper directory
-mkdir "$env:USERPROFILE\whisper-cpp"
-cd "$env:USERPROFILE\whisper-cpp"
+# Node.js 20 LTS
+curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash -
+sudo apt install -y nodejs

-# Download latest release (CUDA build)
-# Check: https://github.com/ggerganov/whisper.cpp/releases
-# Look for: whisper-cublas-bin-x64.zip or whisper-cuda-bin-x64.zip
+# Python 3.12
+sudo apt install -y python3.12 python3.12-venv python3-pip
+
+# Build tools + ffmpeg
+sudo apt install -y ffmpeg git curl build-essential cmake
+
+# Verify
+node --version        # 20.x+
+python3.12 --version
+nvidia-smi            # should show RTX 5090 (GPU passthrough from Windows)
 ```

-### Option B: Build from Source (CUDA)
+> **Important:** Do NOT install NVIDIA drivers inside WSL2. The Windows-side driver handles GPU passthrough automatically.

-```powershell
+### 2. Clone Repo
+
+```bash
+mkdir -p ~/code/mygh && cd ~/code/mygh
+git clone https://github.com/saravanakumardb1/learning_ai_common_plat.git
+cd learning_ai_common_plat/__LOCAL_LLMs
+```
+
+> **Performance note:** Always clone inside WSL2 filesystem (`~/code/...`), NOT in `/mnt/c/` — the Windows filesystem bridge is very slow for `node_modules`.
+
+### 3. Whisper.cpp (CUDA build)
+
+```bash
+cd ~
 git clone https://github.com/ggerganov/whisper.cpp.git
 cd whisper.cpp
 cmake -B build -DGGML_CUDA=ON
-cmake --build build --config Release
-```
+cmake --build build --config Release -j$(nproc)
+sudo cp build/bin/whisper-cli /usr/local/bin/

-### Download Whisper Model
-
-```powershell
-mkdir "$env:USERPROFILE\whisper-models"
-
-# Download ggml-large-v3-turbo (1.5 GB)
-curl -L -o "$env:USERPROFILE\whisper-models\ggml-large-v3-turbo.bin" `
+# Download model (1.5 GB)
+mkdir -p ~/whisper-models
+curl -L -o ~/whisper-models/ggml-large-v3-turbo.bin \
  "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo.bin"
+
+# Verify
+whisper-cli --version
 ```

 > **No corporate proxy on this machine** — download directly from `huggingface.co`.
-> The `hf-mirror.com` workaround is only needed on the corporate MacBook.

-### Verify
+### 4. TTS Setup (One-Shot)

-```powershell
-# Test transcription
-whisper-cli -m "$env:USERPROFILE\whisper-models\ggml-large-v3-turbo.bin" -f test.wav
+```bash
+cd ~/code/mygh/learning_ai_common_plat/__LOCAL_LLMs
+
+# Works exactly like macOS — downloads SNAC, Qwen3-TTS, creates venv
+bash setup-tts.sh
 ```

+The script detects macOS vs Linux and installs the correct PyTorch variant (MPS on macOS, CUDA on Linux). On a personal machine, override the default HuggingFace mirror: `HF_MIRROR=https://huggingface.co bash setup-tts.sh`
+
+### 5. Start Dashboard
+
+```bash
+bash start-dashboard.sh
+# Open http://localhost:3000 in Windows browser
+```
+
+WSL2 automatically forwards ports — the dashboard is accessible from Windows at `localhost:3000`.
+
 ---

-## 3. TTS — Orpheus + Qwen3-TTS
+## Key Differences: macOS vs WSL2

-### 3a. Orpheus TTS (via Ollama)
-
-Already handled in Step 1 (`ollama pull sematre/orpheus:en`).
-
-### 3b. SNAC Decoder
-
-```powershell
-# Create models directory (match macOS layout)
-$MODELS = "$PSScriptRoot\models"   # or wherever you clone the repo
-mkdir "$MODELS\snac_24khz" -Force
-
-# Download SNAC decoder
-curl -L -o "$MODELS\snac_24khz\config.json" `
-  "https://huggingface.co/hubertsiuzdak/snac_24khz/resolve/main/config.json"
-curl -L -o "$MODELS\snac_24khz\pytorch_model.bin" `
-  "https://huggingface.co/hubertsiuzdak/snac_24khz/resolve/main/pytorch_model.bin"
-```
-
-### 3c. Python Venv + Dependencies
-
-```powershell
-cd __LOCAL_LLMs
-
-# Create venv
-python -m venv .venv-qwen-tts
-
-# Activate (Windows uses Scripts, not bin)
-.\.venv-qwen-tts\Scripts\Activate.ps1
-
-# Install PyTorch with CUDA (NOT MPS — that's Apple only)
-pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
-
-# Install other deps
-pip install snac numpy soundfile
-
-# Verify CUDA
-python -c "import torch; print(f'CUDA: {torch.cuda.is_available()}, Device: {torch.cuda.get_device_name(0)}')"
-# Expected: CUDA: True, Device: NVIDIA GeForce RTX 5090 Laptop GPU
-```
-
-### 3d. Qwen3-TTS 0.6B
-
-```powershell
-$MODELS = ".\models"
-
-# Tokenizer (~650 MB)
-mkdir "$MODELS\Qwen3-TTS-Tokenizer-12Hz" -Force
-foreach ($f in @("config.json", "configuration.json", "preprocessor_config.json")) {
-    curl -L -o "$MODELS\Qwen3-TTS-Tokenizer-12Hz\$f" `
-      "https://huggingface.co/Qwen/Qwen3-TTS-Tokenizer-12Hz/resolve/main/$f"
-}
-curl -L -o "$MODELS\Qwen3-TTS-Tokenizer-12Hz\model.safetensors" `
-  "https://huggingface.co/Qwen/Qwen3-TTS-Tokenizer-12Hz/resolve/main/model.safetensors"
-
-# Model weights (~1.8 GB)
-mkdir "$MODELS\Qwen3-TTS-12Hz-0.6B-CustomVoice" -Force
-foreach ($f in @("config.json", "generation_config.json")) {
-    curl -L -o "$MODELS\Qwen3-TTS-12Hz-0.6B-CustomVoice\$f" `
-      "https://huggingface.co/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice/resolve/main/$f"
-}
-curl -L -o "$MODELS\Qwen3-TTS-12Hz-0.6B-CustomVoice\model.safetensors" `
-  "https://huggingface.co/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice/resolve/main/model.safetensors"
-```
-
-### 3e. Test TTS
-
-```powershell
-# Activate venv
-.\.venv-qwen-tts\Scripts\Activate.ps1
-
-# Orpheus TTS test
-python test_orpheus_tts.py
-
-# Qwen3-TTS test
-python test_qwen_tts.py
-```
-
-> **Key difference from macOS:** Qwen3-TTS will use **CUDA** instead of MPS.
-> In `test_qwen_tts.py`, the device selection `torch.device("mps")` will fall through to CUDA automatically
-> since `torch.backends.mps.is_available()` returns False on Windows.
-> You may want to update the device logic to prefer CUDA:
->
-> ```python
-> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-> ```
-
---
-
-## 4. Mission Control Dashboard
-
-```powershell
-cd __LOCAL_LLMs\dashboard
-
-# Install dependencies
-npm install
-
-# Start dev server
-npm run dev
-# Open http://localhost:3000
-```
-
-The dashboard is pure Next.js — works identically on Windows. The API routes auto-detect:
-
- **Ollama** at `localhost:11434`
- **Whisper** models in `%USERPROFILE%\whisper-models\`
- **TTS** engines (Orpheus, Qwen3-TTS) and Python venv
-
-### Start Script (PowerShell)
-
-Use the bash script equivalent:
-
-```powershell
-# Quick start (manual)
-ollama serve    # if not already running as service
-cd __LOCAL_LLMs\dashboard
-npm run dev
-```
-
-> TODO: Create `start-dashboard.ps1` as a PowerShell equivalent of `start-dashboard.sh`
-
---
-
-## 5. Key Differences: macOS vs Windows
-
-| Area                | macOS (M4 Pro 48 GB)                | Windows (Razer Blade 18)              |
-| ------------------- | ----------------------------------- | ------------------------------------- |
-| **GPU**             | Apple Silicon (unified memory, MPS) | RTX 5090 (24 GB VRAM, CUDA)           |
-| **Ollama GPU**      | Automatic (Metal)                   | Automatic (CUDA)                      |
-| **VRAM**            | Shared from 48 GB RAM               | Dedicated 24 GB GDDR7                 |
-| **PyTorch device**  | `mps`                               | `cuda`                                |
-| **Whisper install** | `brew install whisper-cpp`          | Build from source or download release |
-| **Python venv**     | `bin/activate`                      | `Scripts\Activate.ps1`                |
-| **Package manager** | Homebrew                            | winget / scoop                        |
-| **Shell**           | zsh / bash                          | PowerShell / cmd                      |
-| **Scripts**         | `.sh` (bash)                        | `.ps1` (PowerShell)                   |
-| **Model download**  | `hf-mirror.com` (corporate proxy)   | `huggingface.co` (no proxy)           |
-| **Dashboard**       | Identical                           | Identical                             |
-| **Ollama models**   | Identical                           | Identical                             |
+| Area                   | macOS (any Mac)             | WSL2 (any Linux)                       |
+| ---------------------- | --------------------------- | -------------------------------------- |
+| **GPU**                | Apple Silicon (MPS)         | NVIDIA (CUDA)                          |
+| **Ollama**             | macOS native (Metal)        | Windows native, accessed via localhost |
+| **PyTorch device**     | `mps`                       | `cuda`                                 |
+| **Whisper install**    | `brew install whisper-cpp`  | Build from source with CUDA            |
+| **Package manager**    | Homebrew                    | apt                                    |
+| **Shell scripts**      | Work as-is                  | Work as-is                             |
+| **Python venv path**   | `bin/python`                | `bin/python` (same)                    |
+| **Dashboard**          | Identical                   | Identical                              |
+| **Ollama models path** | `~/.ollama/models/`         | Windows `%USERPROFILE%\.ollama\`       |
+| **Model download**     | `hf-mirror.com` (corporate) | `huggingface.co` (direct)              |

 ### Performance Expectations

-| Workload                    | macOS M4 Pro 48 GB           | Razer RTX 5090 24 GB      |
-| --------------------------- | ---------------------------- | ------------------------- |
-| qwen2.5-coder:32b inference | ~15–25 tok/s (MPS/CPU blend) | ~40–60 tok/s (full CUDA)  |
-| Whisper large-v3-turbo      | ~2–4x realtime (CPU)         | ~8–15x realtime (CUDA)    |
-| Orpheus TTS                 | ~realtime (CPU decode)       | ~2–3x realtime (CUDA)     |
-| Qwen3-TTS                   | ~realtime (MPS)              | ~2–4x realtime (CUDA)     |
-| 70B quantized models        | Fits in 48 GB (slow)         | Partially offloads to RAM |
+| Workload                    | macOS M4 Pro 48 GB   | Razer RTX 5090 24 GB            |
+| --------------------------- | -------------------- | ------------------------------- |
+| qwen2.5-coder:32b inference | ~15–25 tok/s         | ~40–60 tok/s                    |
+| Whisper large-v3-turbo      | ~2–4x realtime       | ~8–15x realtime                 |
+| Orpheus TTS                 | ~realtime            | ~2–3x realtime                  |
+| Qwen3-TTS                   | ~realtime (MPS)      | ~2–4x realtime (CUDA)           |
+| 70B quantized models        | Fits in 48 GB (slow) | Partially offloads to 64 GB RAM |
+
+### VRAM Budget (RTX 5090 — 24 GB)
+
+| Model              | VRAM Usage | Fits in GPU? |
+| ------------------ | ---------- | ------------ |
+| llama3.1:8b        | ~5 GB      | ✅ Fully     |
+| qwen2.5-coder:7b   | ~5 GB      | ✅ Fully     |
+| sematre/orpheus:en | ~4 GB      | ✅ Fully     |
+| qwen2.5-coder:32b  | ~19 GB     | ✅ Fully     |
+| deepseek-r1:32b    | ~19 GB     | ✅ Fully     |

 ---

-## 6. File Layout (Same as macOS)
+## Quick Reference — Full Setup Checklist
+
+### Windows Side

 ```
-__LOCAL_LLMs/
-├── dashboard/                       ← Mission Control (port 3000) — works as-is
-├── models/                          ← TTS model weights (gitignored)
-│   ├── snac_24khz/
-│   ├── Qwen3-TTS-Tokenizer-12Hz/
-│   └── Qwen3-TTS-12Hz-0.6B-CustomVoice/
-├── .venv-qwen-tts/                  ← Python venv (Scripts\ on Windows)
-├── test_orpheus_tts.py              ← works as-is (device fallback)
-├── test_qwen_tts.py                 ← update device to prefer CUDA
-├── windows_specific/
-│   ├── razer-blade-18-spec.md       ← hardware spec
-│   └── setup-guide.md              ← this file
-└── docs/                            ← macOS-focused docs (still useful as reference)
-```
-
---
-
-## 7. Quick Reference — Full Setup Checklist
-
-```
-[ ] Install NVIDIA drivers + CUDA Toolkit
+[ ] Install NVIDIA drivers (Game Ready or Studio)
 [ ] Install Ollama (winget install Ollama.Ollama)
-[ ] Pull models: qwen2.5-coder:32b, deepseek-r1:32b, llama3.1:8b, orpheus
-[ ] Install Node.js 20+ (winget)
-[ ] Install Python 3.12 (winget)
-[ ] Install Git (winget)
-[ ] Install ffmpeg (winget)
-[ ] Clone repo
-[ ] Download Whisper model to %USERPROFILE%\whisper-models\
-[ ] Build or download whisper-cpp with CUDA
-[ ] Create Python venv + install PyTorch CUDA + snac
-[ ] Download SNAC decoder
-[ ] Download Qwen3-TTS tokenizer + model
-[ ] npm install in dashboard/
-[ ] Run dashboard: npm run dev
+[ ] Pull all 5 models
+[ ] Install WSL2 (wsl --install -d Ubuntu-24.04)
+```
+
+### WSL2 Side
+
+```
+[ ] Install Node.js 20+, Python 3.12, ffmpeg, git, cmake
+[ ] Verify nvidia-smi shows RTX 5090
+[ ] Clone repo into ~/code/mygh/
+[ ] Build whisper-cpp with CUDA
+[ ] Download Whisper model to ~/whisper-models/
+[ ] Run: bash setup-tts.sh
+[ ] Run: bash start-dashboard.sh
 [ ] Verify: http://localhost:3000 shows all green
 ```
+
+---
+
+## Troubleshooting
+
+### Ollama not accessible from WSL2
+
+```bash
+curl http://localhost:11434/api/tags
+# If fails, check Windows firewall or try:
+curl http://$(hostname).local:11434/api/tags
+```
+
+### CUDA not visible in WSL2
+
+```bash
+nvidia-smi
+# If "command not found":
+# 1. Update Windows NVIDIA drivers to latest
+# 2. Run: wsl --update
+# 3. Do NOT install nvidia-driver-* inside WSL2
+```
+
+### Slow filesystem performance
+
+```bash
+# Clone repos inside WSL2 filesystem: ~/code/...
+# NOT in /mnt/c/ (Windows→WSL bridge is ~10x slower for node_modules)
+```