fix(local-llms): cross-platform audit — 8 bugs/gaps fixed

- setup-tts.sh: make fully cross-platform (macOS + Linux/WSL2)
  - OS detection, apt fallback, CUDA PyTorch install, nvidia-smi check
  - cross-platform playback hints, HF_MIRROR env override
- api/system/route.ts: fix ffmpeg detection (use -version not --version)
- api/system/memory/route.ts: remove unused total variable in Linux path
- api/system/exec/route.ts: expand allowlist with Linux commands
  (head, tail, grep, which, ps, uname, free, lscpu, nvidia-smi, etc.)
- api/tts/route.ts: cross-platform venv path + CUDA/MPS label
- api/whisper/route.ts: Linux binary/model paths
- api/ollama/logs/route.ts: Linux log paths + WSL2 hint
- test_qwen_tts.py: platform-aware speech text + CUDA device detection
- test_orpheus_tts.py: platform-aware text, move import sys to top
- setup-guide.md: fix false auto-detect claim, add HF_MIRROR hint
This commit is contained in:
saravanakumardb1 2026-02-21 15:27:49 -08:00
parent f85b455eb5
commit b1d2e4ec81
10 changed files with 518 additions and 440 deletions

View File

@ -4,11 +4,15 @@ import { homedir } from 'os';
import { join } from 'path';
import { existsSync } from 'fs';
const IS_MAC = process.platform === 'darwin';
export async function GET() {
const logPaths = [
join(homedir(), '.ollama', 'logs', 'server.log'),
join(homedir(), '.ollama', 'logs', 'gpu.log'),
'/tmp/ollama.log',
// Linux / WSL2 — journalctl may write here
'/var/log/ollama.log',
];
for (const logPath of logPaths) {
@ -25,11 +29,13 @@ export async function GET() {
}
}
// On macOS, Ollama logs via unified logging
// Fallback: platform-specific logging hint
const hint = IS_MAC
? 'Ollama uses macOS unified logging. Use: log show --predicate \'subsystem == "com.ollama"\' --last 5m'
: 'Ollama logs not found. If running on Windows (accessed via WSL2), check Windows Event Viewer or: journalctl -u ollama --no-pager -n 50';
return NextResponse.json({
lines: [
'Ollama uses macOS unified logging. Use: log show --predicate \'subsystem == "com.ollama"\' --last 5m',
],
lines: [hint],
path: 'system',
total: 1,
});

View File

@ -5,9 +5,9 @@ import { promisify } from 'util';
const execFileAsync = promisify(execFile);
const COMMAND_ALLOWLIST = new Set([
// Cross-platform (macOS + Linux)
'git',
'npm',
'brew',
'cat',
'ls',
'wc',
@ -15,6 +15,21 @@ const COMMAND_ALLOWLIST = new Set([
'df',
'echo',
'date',
'head',
'tail',
'grep',
'which',
'ps',
'uname',
'whoami',
// macOS
'brew',
// Linux / WSL2
'free',
'lscpu',
'nvidia-smi',
'dpkg',
'apt',
]);
export async function POST(request: NextRequest) {

View File

@ -1,10 +1,13 @@
import { NextResponse } from 'next/server';
import { exec } from 'child_process';
import { promisify } from 'util';
import { readFile } from 'fs/promises';
import os from 'os';
const execAsync = promisify(exec);
const IS_MAC = process.platform === 'darwin';
interface ProcessInfo {
pid: number;
name: string;
@ -26,7 +29,7 @@ interface VmStatBreakdown {
async function getTopProcesses(limit = 20): Promise<ProcessInfo[]> {
try {
// ps with RSS in KB, sorted descending by RSS
// ps with RSS in KB, sorted descending by RSS — works on both macOS and Linux
const { stdout } = await execAsync(
`ps -axo pid=,rss=,%mem=,user=,comm= | sort -k2 -rn | head -${limit}`,
{ timeout: 3000 }
@ -60,36 +63,67 @@ async function getTopProcesses(limit = 20): Promise<ProcessInfo[]> {
}
async function getVmStatBreakdown(): Promise<VmStatBreakdown> {
try {
const { stdout } = await execAsync('vm_stat', { timeout: 2000 });
const pageSizeMatch = stdout.match(/page size of (\d+) bytes/);
const pageSize = pageSizeMatch ? parseInt(pageSizeMatch[1]) : 16384;
const parse = (label: string): number => {
const match = stdout.match(new RegExp(`${label}:\\s+(\\d+)`));
return match ? parseInt(match[1]) * pageSize : 0;
};
return {
active: parse('Pages active'),
wired: parse('Pages wired down'),
compressor: parse('Pages occupied by compressor'),
inactive: parse('Pages inactive'),
purgeable: parse('Pages purgeable'),
speculative: parse('Pages speculative'),
free: parse('Pages free'),
pageSize,
};
} catch {
return {
active: 0,
wired: 0,
compressor: 0,
inactive: 0,
purgeable: 0,
speculative: 0,
free: 0,
pageSize: 16384,
};
if (IS_MAC) {
try {
const { stdout } = await execAsync('vm_stat', { timeout: 2000 });
const pageSizeMatch = stdout.match(/page size of (\d+) bytes/);
const pageSize = pageSizeMatch ? parseInt(pageSizeMatch[1]) : 16384;
const parse = (label: string): number => {
const match = stdout.match(new RegExp(`${label}:\\s+(\\d+)`));
return match ? parseInt(match[1]) * pageSize : 0;
};
return {
active: parse('Pages active'),
wired: parse('Pages wired down'),
compressor: parse('Pages occupied by compressor'),
inactive: parse('Pages inactive'),
purgeable: parse('Pages purgeable'),
speculative: parse('Pages speculative'),
free: parse('Pages free'),
pageSize,
};
} catch {
// fall through to zeros
}
} else {
// Linux / WSL2 — parse /proc/meminfo into vm_stat-compatible structure
try {
const raw = await readFile('/proc/meminfo', 'utf-8');
const parse = (key: string): number => {
const match = raw.match(new RegExp(`${key}:\\s+(\\d+)`));
return match ? parseInt(match[1]) * 1024 : 0;
};
const free = parse('MemFree');
const buffers = parse('Buffers');
const cached = parse('Cached');
const sReclaimable = parse('SReclaimable');
const active = parse('Active');
return {
active,
wired: buffers, // closest analogy
compressor: parse('SwapCached'),
inactive: parse('Inactive'),
purgeable: sReclaimable,
speculative: 0,
free,
pageSize: 4096,
};
} catch {
// fall through to zeros
}
}
return {
active: 0,
wired: 0,
compressor: 0,
inactive: 0,
purgeable: 0,
speculative: 0,
free: 0,
pageSize: IS_MAC ? 16384 : 4096,
};
}
export async function GET() {

View File

@ -1,11 +1,14 @@
import { NextResponse } from 'next/server';
import { exec, execFile } from 'child_process';
import { promisify } from 'util';
import { readFile } from 'fs/promises';
import os from 'os';
const execAsync = promisify(exec);
const execFileAsync = promisify(execFile);
const IS_MAC = process.platform === 'darwin';
// Cache slow commands with TTL
let staticCache: {
chip: string;
@ -20,10 +23,17 @@ const OLLAMA_DISK_TTL = 60 * 1000; // 60 seconds
async function getChipInfo(): Promise<string> {
try {
if (IS_MAC) {
const { stdout } = await execAsync(
"sysctl -n machdep.cpu.brand_string 2>/dev/null || echo 'Unknown'"
);
return stdout.trim();
}
// Linux / WSL2
const { stdout } = await execAsync(
"sysctl -n machdep.cpu.brand_string 2>/dev/null || echo 'Unknown'"
"lscpu 2>/dev/null | grep 'Model name' | sed 's/.*:\\s*//' || cat /proc/cpuinfo | grep 'model name' | head -1 | sed 's/.*: //'"
);
return stdout.trim();
return stdout.trim() || 'Unknown';
} catch {
return 'Unknown';
}
@ -55,30 +65,63 @@ async function getOllamaModelsDiskUsage(): Promise<number> {
async function getGpuInfo(): Promise<string> {
try {
if (IS_MAC) {
const { stdout } = await execAsync(
"system_profiler SPDisplaysDataType 2>/dev/null | grep 'Chipset Model' | sed 's/.*: //'",
{ timeout: 5000 }
);
return stdout.trim() || 'Apple Silicon (integrated)';
}
// Linux / WSL2 — try nvidia-smi first, fall back to lspci
try {
const { stdout } = await execAsync(
'nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1',
{ timeout: 3000 }
);
if (stdout.trim()) return stdout.trim();
} catch {
/* no nvidia-smi */
}
const { stdout } = await execAsync(
"system_profiler SPDisplaysDataType 2>/dev/null | grep 'Chipset Model' | sed 's/.*: //'",
{ timeout: 5000 }
"lspci 2>/dev/null | grep -i 'vga\\|3d' | sed 's/.*: //' | head -1"
);
return stdout.trim() || 'Apple Silicon (integrated)';
return stdout.trim() || 'Unknown';
} catch {
return 'Apple Silicon (integrated)';
return IS_MAC ? 'Apple Silicon (integrated)' : 'Unknown';
}
}
async function getBrewPackages(): Promise<Array<{ name: string; version: string }>> {
const targets = ['ollama', 'whisper-cpp', 'ffmpeg'];
const results: Array<{ name: string; version: string }> = [];
for (const pkg of targets) {
try {
const { stdout } = await execFileAsync('brew', ['list', '--versions', pkg], {
timeout: 3000,
});
const parts = stdout.trim().split(' ');
if (parts.length >= 2) {
results.push({ name: parts[0], version: parts.slice(1).join(' ') });
if (IS_MAC) {
for (const pkg of targets) {
try {
const { stdout } = await execFileAsync('brew', ['list', '--versions', pkg], {
timeout: 3000,
});
const parts = stdout.trim().split(' ');
if (parts.length >= 2) {
results.push({ name: parts[0], version: parts.slice(1).join(' ') });
}
} catch {
// not installed
}
}
} else {
// Linux / WSL2 — check via version commands (ffmpeg uses -version, others use --version)
for (const pkg of targets) {
const bin = pkg === 'whisper-cpp' ? 'whisper-cli' : pkg;
const flag = bin === 'ffmpeg' ? '-version' : '--version';
try {
const { stdout } = await execAsync(`${bin} ${flag} 2>&1 | head -1`, { timeout: 3000 });
if (stdout.trim()) {
results.push({ name: pkg, version: stdout.trim() });
}
} catch {
// not installed
}
} catch {
// not installed
}
}
return results;
@ -106,7 +149,8 @@ async function getCachedOllamaDiskUsage(): Promise<number> {
return value;
}
// macOS vm_stat gives accurate memory breakdown (os.freemem() excludes reclaimable cache)
// macOS: vm_stat gives accurate memory breakdown (os.freemem() excludes reclaimable cache)
// Linux: /proc/meminfo gives accurate breakdown
async function getAccurateMemory(): Promise<{
total: number;
appMemory: number;
@ -115,42 +159,66 @@ async function getAccurateMemory(): Promise<{
pressure: string;
}> {
const totalMem = os.totalmem();
try {
const { stdout } = await execAsync('vm_stat', { timeout: 2000 });
const pageSizeMatch = stdout.match(/page size of (\d+) bytes/);
const pageSize = pageSizeMatch ? parseInt(pageSizeMatch[1]) : 16384;
const parse = (label: string): number => {
const match = stdout.match(new RegExp(`${label}:\\s+(\\d+)`));
return match ? parseInt(match[1]) * pageSize : 0;
};
const active = parse('Pages active');
const wired = parse('Pages wired down');
const inactive = parse('Pages inactive');
const purgeable = parse('Pages purgeable');
const speculative = parse('Pages speculative');
const free = parse('Pages free');
const compressor = parse('Pages occupied by compressor');
const appMemory = active + wired + compressor;
const cached = inactive + purgeable + speculative;
// Return raw free separately from cached — no overlap
// available for loading = free + cached (macOS reclaims cached on demand)
if (IS_MAC) {
try {
const { stdout } = await execAsync('vm_stat', { timeout: 2000 });
const pageSizeMatch = stdout.match(/page size of (\d+) bytes/);
const pageSize = pageSizeMatch ? parseInt(pageSizeMatch[1]) : 16384;
const parse = (label: string): number => {
const match = stdout.match(new RegExp(`${label}:\\s+(\\d+)`));
return match ? parseInt(match[1]) * pageSize : 0;
};
const active = parse('Pages active');
const wired = parse('Pages wired down');
const inactive = parse('Pages inactive');
const purgeable = parse('Pages purgeable');
const speculative = parse('Pages speculative');
const free = parse('Pages free');
const compressor = parse('Pages occupied by compressor');
const ratio = appMemory / totalMem;
const pressure = ratio > 0.85 ? 'critical' : ratio > 0.7 ? 'warning' : 'normal';
const appMemory = active + wired + compressor;
const cached = inactive + purgeable + speculative;
return { total: totalMem, appMemory, cached, free, pressure };
} catch {
// Fallback to Node.js (inaccurate on macOS but works everywhere)
const freeMem = os.freemem();
return {
total: totalMem,
appMemory: totalMem - freeMem,
cached: 0,
free: freeMem,
pressure: 'unknown',
};
const ratio = appMemory / totalMem;
const pressure = ratio > 0.85 ? 'critical' : ratio > 0.7 ? 'warning' : 'normal';
return { total: totalMem, appMemory, cached, free, pressure };
} catch {
// fall through to generic fallback
}
} else {
// Linux / WSL2 — parse /proc/meminfo
try {
const raw = await readFile('/proc/meminfo', 'utf-8');
const parse = (key: string): number => {
const match = raw.match(new RegExp(`${key}:\\s+(\\d+)`));
return match ? parseInt(match[1]) * 1024 : 0; // /proc/meminfo is in kB
};
const total = parse('MemTotal');
const free = parse('MemFree');
const buffers = parse('Buffers');
const cached = parse('Cached') + parse('SReclaimable') + buffers;
const appMemory = total - free - cached;
const ratio = appMemory / total;
const pressure = ratio > 0.85 ? 'critical' : ratio > 0.7 ? 'warning' : 'normal';
return { total, appMemory, cached, free, pressure };
} catch {
// fall through to generic fallback
}
}
// Generic fallback (works everywhere but less accurate)
const freeMem = os.freemem();
return {
total: totalMem,
appMemory: totalMem - freeMem,
cached: 0,
free: freeMem,
pressure: 'unknown',
};
}
export async function GET() {

View File

@ -6,9 +6,14 @@ import { join, resolve } from 'path';
const execAsync = promisify(exec);
const IS_MAC = process.platform === 'darwin';
// process.cwd() = dashboard/, parent = __LOCAL_LLMs/
const LOCAL_LLMS_DIR = resolve(process.cwd(), '..');
// macOS/Linux: bin/python, Windows native: Scripts/python.exe
const VENV_PYTHON = join(LOCAL_LLMS_DIR, '.venv-qwen-tts', 'bin', 'python');
interface TtsEngine {
name: string;
type: 'ollama' | 'python';
@ -67,8 +72,7 @@ async function checkOrpheus(): Promise<TtsEngine> {
const snacSize = hasSnac ? await getFileSize(snacPath) : 0;
// Check Python venv
const venvPython = join(LOCAL_LLMS_DIR, '.venv-qwen-tts', 'bin', 'python');
const hasVenv = await fileExists(venvPython);
const hasVenv = await fileExists(VENV_PYTHON);
if (hasModel && hasSnac && hasVenv) {
engine.status = 'ready';
@ -114,13 +118,14 @@ async function checkQwenTts(): Promise<TtsEngine> {
}
const hasTokenizer = await fileExists(join(tokenizerDir, 'config.json'));
const venvPython = join(LOCAL_LLMS_DIR, '.venv-qwen-tts', 'bin', 'python');
const hasVenv = await fileExists(venvPython);
const hasVenv = await fileExists(VENV_PYTHON);
if (hasModel && hasTokenizer && hasVenv) {
engine.status = 'ready';
engine.size = `${(modelSize / 1e9).toFixed(1)} GB`;
engine.details = '0.6B params · 10 languages · MPS/CPU';
engine.details = IS_MAC
? '0.6B params · 10 languages · MPS/CPU'
: '0.6B params · 10 languages · CUDA/CPU';
} else if (hasModel || hasTokenizer) {
engine.status = 'partial';
const missing: string[] = [];
@ -141,22 +146,21 @@ async function checkVenv(): Promise<{
python?: string;
packages?: string[];
}> {
const venvPython = join(LOCAL_LLMS_DIR, '.venv-qwen-tts', 'bin', 'python');
const exists = await fileExists(venvPython);
const exists = await fileExists(VENV_PYTHON);
if (!exists) return { exists: false };
try {
const { stdout } = await execAsync(
`"${venvPython}" -c "import snac; import torch; print(f'snac={snac.__version__} torch={torch.__version__}')"`,
`"${VENV_PYTHON}" -c "import snac; import torch; print(f'snac={snac.__version__} torch={torch.__version__}')"`,
{ timeout: 5000 }
);
return {
exists: true,
python: venvPython,
python: VENV_PYTHON,
packages: stdout.trim().split(' '),
};
} catch {
return { exists: true, python: venvPython };
return { exists: true, python: VENV_PYTHON };
}
}

View File

@ -7,9 +7,22 @@ import { homedir } from 'os';
const execAsync = promisify(exec);
const IS_MAC = process.platform === 'darwin';
async function getWhisperBinaries(): Promise<string[]> {
try {
const { stdout } = await execAsync('ls /opt/homebrew/bin/whisper-* 2>/dev/null');
if (IS_MAC) {
const { stdout } = await execAsync('ls /opt/homebrew/bin/whisper-* 2>/dev/null');
return stdout
.trim()
.split('\n')
.filter(Boolean)
.map(p => p.split('/').pop() || p);
}
// Linux / WSL2 — check common locations
const { stdout } = await execAsync(
'ls /usr/local/bin/whisper-* /usr/bin/whisper-* 2>/dev/null || which whisper-cli 2>/dev/null'
);
return stdout
.trim()
.split('\n')
@ -25,7 +38,9 @@ const WHISPER_MODEL_DIRS = (process.env.WHISPER_MODELS_DIR || '')
.filter(Boolean)
.concat([
join(homedir(), 'whisper-models'),
'/opt/homebrew/share/whisper-cpp/models',
...(IS_MAC
? ['/opt/homebrew/share/whisper-cpp/models']
: ['/usr/local/share/whisper-cpp/models', '/usr/share/whisper-cpp/models']),
join(homedir(), '.cache', 'whisper'),
]);

View File

@ -3,26 +3,24 @@
# TTS Setup — One-Shot Script for Fresh Laptop
#
# Sets up Orpheus TTS (via Ollama) and Qwen3-TTS (direct Python)
# on Apple Silicon Macs. Works through corporate proxy.
# on macOS (Apple Silicon) or Linux (CUDA GPU / WSL2).
#
# What this does:
# 1. Installs Python 3.12 via Homebrew (if missing)
# 2. Creates Python venv with TTS packages
# 1. Installs Python 3.12 (Homebrew on macOS, apt on Linux)
# 2. Creates Python venv with TTS packages (MPS on macOS, CUDA on Linux)
# 3. Pulls Orpheus TTS model via Ollama
# 4. Downloads SNAC audio decoder via hf-mirror.com
# 5. (Optional) Downloads Qwen3-TTS 0.6B via hf-mirror.com
# 4. Downloads SNAC audio decoder
# 5. (Optional) Downloads Qwen3-TTS 0.6B
#
# Prerequisites:
# - macOS with Apple Silicon (M1/M2/M3/M4)
# - Homebrew installed
# - Ollama installed (brew install ollama)
# macOS: Homebrew + Ollama installed
# Linux: apt + Ollama accessible at localhost:11434
#
# Usage:
# bash setup-tts.sh
#
# After setup, test with:
# .venv-qwen-tts/bin/python test_orpheus_tts.py
# afplay test_orpheus_tara.wav
# ============================================================
set -e
@ -31,7 +29,13 @@ VENV="$SCRIPT_DIR/.venv-qwen-tts"
MODELS_DIR="$SCRIPT_DIR/models"
# HuggingFace mirror that works through corporate proxy
HF_MIRROR="https://hf-mirror.com"
# On personal machines, set HF_MIRROR=https://huggingface.co to download directly
HF_MIRROR="${HF_MIRROR:-https://hf-mirror.com}"
# Detect OS
OS_TYPE="$(uname -s)"
IS_MAC=false
[ "$OS_TYPE" = "Darwin" ] && IS_MAC=true
RED='\033[0;31m'
GREEN='\033[0;32m'
@ -52,34 +56,58 @@ echo ""
# ── 0. Check prerequisites ──────────────────────────────────
step "Checking prerequisites"
# Homebrew
if ! command -v brew &>/dev/null; then
fail "Homebrew not found. Install: /bin/bash -c \"\$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)\""
fi
ok "Homebrew"
if $IS_MAC; then
# Homebrew
if ! command -v brew &>/dev/null; then
fail "Homebrew not found. Install: /bin/bash -c \"\$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)\""
fi
ok "Homebrew"
# Ollama
if ! command -v ollama &>/dev/null; then
warn "Ollama not found. Installing..."
brew install ollama
# Ollama (install via Homebrew if missing)
if ! command -v ollama &>/dev/null; then
warn "Ollama not found. Installing..."
brew install ollama
fi
else
# Linux / WSL2 — Ollama should be installed on host or via install script
if ! command -v ollama &>/dev/null; then
# On WSL2 Ollama runs on the Windows side; check if reachable
if ! curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then
fail "Ollama not found and not reachable at localhost:11434. Install Ollama on Windows or run: curl -fsSL https://ollama.com/install.sh | sh"
fi
ok "Ollama reachable at localhost:11434 (Windows host)"
fi
fi
ok "Ollama installed"
# Check if Ollama is running
if ! curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then
warn "Ollama not running. Starting..."
ollama serve &>/dev/null &
sleep 3
if command -v ollama &>/dev/null; then
ollama serve &>/dev/null &
sleep 3
fi
if ! curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then
fail "Could not start Ollama. Try manually: ollama serve"
fi
fi
ok "Ollama running on port 11434"
# Apple Silicon check
# GPU check
ARCH=$(uname -m)
if [ "$ARCH" != "arm64" ]; then
warn "Not Apple Silicon ($ARCH). MPS acceleration won't be available."
if $IS_MAC; then
if [ "$ARCH" != "arm64" ]; then
warn "Not Apple Silicon ($ARCH). MPS acceleration won't be available."
else
ok "Apple Silicon ($ARCH) — MPS acceleration available"
fi
else
if command -v nvidia-smi &>/dev/null; then
GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1)
ok "NVIDIA GPU detected: $GPU_NAME — CUDA acceleration available"
else
warn "nvidia-smi not found. CUDA acceleration won't be available (CPU fallback)."
fi
fi
# ── 1. Install Python 3.12 ──────────────────────────────────
@ -87,7 +115,7 @@ step "Python 3.12"
PYTHON_CMD=""
# Check various Python 3.12 locations
for cmd in python3.12 /opt/homebrew/bin/python3.12 /usr/local/bin/python3.12; do
for cmd in python3.12 /opt/homebrew/bin/python3.12 /usr/local/bin/python3.12 python3; do
if command -v "$cmd" &>/dev/null; then
PYTHON_CMD="$cmd"
break
@ -95,9 +123,15 @@ for cmd in python3.12 /opt/homebrew/bin/python3.12 /usr/local/bin/python3.12; do
done
if [ -z "$PYTHON_CMD" ]; then
warn "Python 3.12 not found. Installing via Homebrew..."
brew install python@3.12
PYTHON_CMD="/opt/homebrew/bin/python3.12"
if $IS_MAC; then
warn "Python 3.12 not found. Installing via Homebrew..."
brew install python@3.12
PYTHON_CMD="/opt/homebrew/bin/python3.12"
else
warn "Python 3.12 not found. Installing via apt..."
sudo apt update && sudo apt install -y python3.12 python3.12-venv python3-pip
PYTHON_CMD="python3.12"
fi
fi
PYTHON_VER=$("$PYTHON_CMD" --version 2>&1)
@ -123,7 +157,15 @@ if "$VENV/bin/python" -c "import snac" &>/dev/null; then
else
echo "Installing packages (this may take a few minutes)..."
"$VENV/bin/pip" install -U pip --quiet
"$VENV/bin/pip" install -U snac qwen-tts --quiet
if $IS_MAC; then
# macOS: default PyTorch includes MPS support
"$VENV/bin/pip" install -U snac qwen-tts --quiet
else
# Linux: install PyTorch with CUDA first, then snac/qwen-tts
echo "Installing PyTorch with CUDA support..."
"$VENV/bin/pip" install -U torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124 --quiet
"$VENV/bin/pip" install -U snac qwen-tts --quiet
fi
ok "Packages installed"
fi
@ -144,7 +186,11 @@ step "SNAC 24kHz audio decoder (~76 MB)"
mkdir -p "$MODELS_DIR/snac_24khz"
if [ -f "$MODELS_DIR/snac_24khz/pytorch_model.bin" ]; then
SIZE=$(stat -f%z "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null || stat -c%s "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null)
if $IS_MAC; then
SIZE=$(stat -f%z "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null)
else
SIZE=$(stat -c%s "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null)
fi
if [ "$SIZE" -gt 1000000 ]; then
ok "SNAC decoder already downloaded ($(echo "scale=1; $SIZE/1048576" | bc) MB)"
else
@ -247,7 +293,11 @@ du -sh "$MODELS_DIR"/* 2>/dev/null | sed 's/^/ /'
echo ""
echo "Test commands:"
echo " $VENV/bin/python $SCRIPT_DIR/test_orpheus_tts.py"
echo " afplay test_orpheus_tara.wav"
if $IS_MAC; then
echo " afplay test_orpheus_tara.wav"
else
echo " aplay test_orpheus_tara.wav (or: ffplay -nodisp -autoexit test_orpheus_tara.wav)"
fi
if [ -d "$QWEN_MODEL_DIR" ]; then
echo " $VENV/bin/python $SCRIPT_DIR/test_qwen_tts.py"
fi

View File

@ -13,6 +13,7 @@ Usage:
"""
import os
import re
import sys
import time
import json
import struct
@ -166,7 +167,7 @@ def main():
# Voices: tara, leah, jess, leo, dan, mia, zac, zoe
tests = [
("Hello! This is Orpheus text to speech, running entirely on your Mac through Ollama.", "tara"),
("Hello! This is Orpheus text to speech, running entirely locally through Ollama.", "tara"),
("<laugh> That's amazing! Local AI speech generation without any cloud services!", "leo"),
]
@ -182,7 +183,10 @@ def main():
save_wav(audio, sr, outpath)
print("\n=== Done! Open the .wav files to listen. ===")
print("Play with: afplay test_orpheus_tara.wav")
if sys.platform == "darwin":
print("Play with: afplay test_orpheus_tara.wav")
else:
print("Play with: aplay test_orpheus_tara.wav (or: ffplay -nodisp -autoexit test_orpheus_tara.wav)")
if __name__ == "__main__":

View File

@ -1,5 +1,5 @@
"""
Test Qwen3-TTS 0.6B on Apple Silicon (MPS or CPU fallback).
Test Qwen3-TTS 0.6B (CUDA, MPS, or CPU fallback).
Prerequisites:
bash setup-tts.sh (one-shot: installs everything)
@ -24,8 +24,12 @@ if not os.path.isdir(MODEL_PATH):
print("Run: bash setup-tts.sh (or: bash download-tts-models.sh qwen)")
raise SystemExit(1)
# Pick device: MPS if available, else CPU
if torch.backends.mps.is_available():
# Pick device: CUDA > MPS > CPU
if torch.cuda.is_available():
device = "cuda"
dtype = torch.float16
print(f"Using CUDA ({torch.cuda.get_device_name(0)})")
elif torch.backends.mps.is_available():
device = "mps"
dtype = torch.float32 # MPS doesn't support bfloat16
print(f"Using MPS (Apple Metal GPU)")
@ -48,7 +52,7 @@ print(f"Supported speakers: {model.get_supported_speakers()}")
print(f"Supported languages: {model.get_supported_languages()}")
# Test 1: English with a built-in speaker
text = "Hello! Welcome to the local LLM dashboard. I am Qwen three T T S, running entirely on your Mac."
text = f"Hello! Welcome to the local LLM dashboard. I am Qwen three T T S, running entirely on your {'Mac' if device == 'mps' else 'machine'} using {device.upper()}."
print(f"\nGenerating speech for: {text[:60]}...")
t1 = time.time()

View File

@ -1,372 +1,250 @@
# Windows Setup Guide — Local LLM Stack on Razer Blade 18
> **Hardware:** Razer Blade 18 · Intel Core Ultra 9 275HX · RTX 5090 24 GB GDDR7 · 64 GB DDR5 · 4 TB NVMe
> **OS:** Windows 11 Home
> **OS:** Windows 11 Home + WSL2 (Ubuntu)
> **Goal:** Mirror the macOS `__LOCAL_LLMs` stack — Ollama, Whisper, TTS (Orpheus + Qwen3), Mission Control dashboard
> **See also:** [razer-blade-18-spec.md](razer-blade-18-spec.md) for full hardware specs
---
## Prerequisites
## Architecture: Windows-Native + WSL2
### 1. Windows Package Manager
Install **winget** (ships with Windows 11) and optionally **Scoop** for CLI tools:
```powershell
# Verify winget
winget --version
# Install Scoop (optional, useful for dev tools)
Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
Invoke-RestMethod -Uri https://get.scoop.sh | Invoke-Expression
```
┌────────────────────────────────────────────────────────┐
│ Windows 11 │
│ ├── NVIDIA drivers + CUDA (native) │
│ ├── Ollama (native Windows service, port 11434) │
│ └── Browser → http://localhost:3000 │
│ │
│ ┌──────────────────────────────────────────────────┐ │
│ │ WSL2 (Ubuntu 24.04) │ │
│ │ ├── Node.js, Python 3.12, ffmpeg, git │ │
│ │ ├── __LOCAL_LLMs/ (cloned here) │ │
│ │ │ ├── dashboard/ → npm run dev (port 3000) │ │
│ │ │ ├── setup-tts.sh (works as-is) │ │
│ │ │ ├── start-dashboard.sh (works as-is) │ │
│ │ │ └── models/ (SNAC, Qwen3-TTS) │ │
│ │ ├── whisper-cpp (CUDA build) │ │
│ │ └── .venv-qwen-tts/ (PyTorch CUDA) │ │
│ └──────────────────────────────────────────────────┘ │
└────────────────────────────────────────────────────────┘
```
### 2. NVIDIA CUDA Toolkit
**Why WSL2?** All existing bash scripts, Python venvs, and Node.js tooling work identically to macOS — zero porting. The dashboard API routes auto-detect macOS vs Linux at runtime via `process.platform`.
The RTX 5090 needs the latest CUDA drivers for GPU-accelerated inference.
---
## Phase 1: Windows-Native Setup
### 1. NVIDIA Drivers
```powershell
# Install NVIDIA drivers (latest Game Ready or Studio)
winget install --id Nvidia.GeForceExperience
# Install CUDA Toolkit (required for PyTorch CUDA)
winget install --id Nvidia.CUDA
# Or download from: https://developer.nvidia.com/cuda-downloads
# Install latest NVIDIA Game Ready or Studio drivers
# Download from: https://www.nvidia.com/Download/index.aspx
# Verify
nvidia-smi
# Should show: RTX 5090, 24 GB VRAM, CUDA 13.x+
```
Expected output should show:
### 2. Ollama (Windows-Native)
- **RTX 5090** with **24 GB** VRAM
- CUDA version 13.x+
### 3. Node.js (for Mission Control Dashboard)
```powershell
winget install --id OpenJS.NodeJS.LTS
# Verify
node --version # should be 20.x+
npm --version
```
### 4. Python 3.12
```powershell
winget install --id Python.Python.3.12
# Verify
python --version
pip --version
```
### 5. Git
```powershell
winget install --id Git.Git
```
### 6. ffmpeg
```powershell
winget install --id Gyan.FFmpeg
# Or: scoop install ffmpeg
```
---
## 1. Ollama — LLM Server
### Install
Ollama runs natively on Windows and is accessible from WSL2 at `localhost:11434`.
```powershell
winget install --id Ollama.Ollama
```
Ollama for Windows runs as a background service and automatically uses CUDA (RTX 5090).
### Verify
```powershell
ollama --version
curl http://localhost:11434/api/tags
```
### Download Models
```powershell
# Coding
ollama pull qwen2.5-coder:32b # 19 GB — primary coding model
ollama pull qwen2.5-coder:7b # 4.7 GB — fast coding
# Reasoning
ollama pull deepseek-r1:32b # 19 GB — chain-of-thought
# General
ollama pull llama3.1:8b # 4.9 GB — fast general tasks
# TTS
ollama pull sematre/orpheus:en # 4 GB — text-to-speech (8 voices)
# Verify
ollama list
ollama --version
```
> **Note:** With 24 GB VRAM, Ollama will offload 32B models almost entirely to GPU.
> On macOS (48 GB unified), the 32B models run in shared CPU/GPU memory.
> On this machine, **GPU inference will be significantly faster** for models that fit in 24 GB VRAM.
### 3. Pull Models (from Windows or WSL2)
### VRAM Budget (RTX 5090 — 24 GB)
```bash
ollama pull qwen2.5-coder:32b # 19 GB — primary coding model
ollama pull qwen2.5-coder:7b # 4.7 GB — fast coding
ollama pull deepseek-r1:32b # 19 GB — chain-of-thought
ollama pull llama3.1:8b # 4.9 GB — fast general tasks
ollama pull sematre/orpheus:en # 4 GB — text-to-speech (8 voices)
| Model | VRAM Usage | Fits in GPU? |
| ---------------------------- | ---------- | ------------ |
| llama3.1:8b | ~5 GB | ✅ Fully |
| qwen2.5-coder:7b | ~5 GB | ✅ Fully |
| sematre/orpheus:en | ~4 GB | ✅ Fully |
| qwen2.5-coder:32b | ~19 GB | ✅ Fully |
| deepseek-r1:32b | ~19 GB | ✅ Fully |
| Two 7B models simultaneously | ~10 GB | ✅ Both fit |
ollama list # verify all 5 models
```
### 4. Install WSL2
```powershell
# From PowerShell (Admin)
wsl --install -d Ubuntu-24.04
# Reboot if prompted, then set up username/password
```
---
## 2. Whisper.cpp — Speech-to-Text
## Phase 2: WSL2 Setup
### Option A: Pre-built Binary (Recommended)
### 1. Install Dependencies
Download the latest release from GitHub:
```bash
# Update
sudo apt update && sudo apt upgrade -y
```powershell
# Create whisper directory
mkdir "$env:USERPROFILE\whisper-cpp"
cd "$env:USERPROFILE\whisper-cpp"
# Node.js 20 LTS
curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash -
sudo apt install -y nodejs
# Download latest release (CUDA build)
# Check: https://github.com/ggerganov/whisper.cpp/releases
# Look for: whisper-cublas-bin-x64.zip or whisper-cuda-bin-x64.zip
# Python 3.12
sudo apt install -y python3.12 python3.12-venv python3-pip
# Build tools + ffmpeg
sudo apt install -y ffmpeg git curl build-essential cmake
# Verify
node --version # 20.x+
python3.12 --version
nvidia-smi # should show RTX 5090 (GPU passthrough from Windows)
```
### Option B: Build from Source (CUDA)
> **Important:** Do NOT install NVIDIA drivers inside WSL2. The Windows-side driver handles GPU passthrough automatically.
```powershell
### 2. Clone Repo
```bash
mkdir -p ~/code/mygh && cd ~/code/mygh
git clone https://github.com/saravanakumardb1/learning_ai_common_plat.git
cd learning_ai_common_plat/__LOCAL_LLMs
```
> **Performance note:** Always clone inside WSL2 filesystem (`~/code/...`), NOT in `/mnt/c/` — the Windows filesystem bridge is very slow for `node_modules`.
### 3. Whisper.cpp (CUDA build)
```bash
cd ~
git clone https://github.com/ggerganov/whisper.cpp.git
cd whisper.cpp
cmake -B build -DGGML_CUDA=ON
cmake --build build --config Release
```
cmake --build build --config Release -j$(nproc)
sudo cp build/bin/whisper-cli /usr/local/bin/
### Download Whisper Model
```powershell
mkdir "$env:USERPROFILE\whisper-models"
# Download ggml-large-v3-turbo (1.5 GB)
curl -L -o "$env:USERPROFILE\whisper-models\ggml-large-v3-turbo.bin" `
# Download model (1.5 GB)
mkdir -p ~/whisper-models
curl -L -o ~/whisper-models/ggml-large-v3-turbo.bin \
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo.bin"
# Verify
whisper-cli --version
```
> **No corporate proxy on this machine** — download directly from `huggingface.co`.
> The `hf-mirror.com` workaround is only needed on the corporate MacBook.
### Verify
### 4. TTS Setup (One-Shot)
```powershell
# Test transcription
whisper-cli -m "$env:USERPROFILE\whisper-models\ggml-large-v3-turbo.bin" -f test.wav
```bash
cd ~/code/mygh/learning_ai_common_plat/__LOCAL_LLMs
# Works exactly like macOS — downloads SNAC, Qwen3-TTS, creates venv
bash setup-tts.sh
```
The script detects macOS vs Linux and installs the correct PyTorch variant (MPS on macOS, CUDA on Linux). On a personal machine, override the default HuggingFace mirror: `HF_MIRROR=https://huggingface.co bash setup-tts.sh`
### 5. Start Dashboard
```bash
bash start-dashboard.sh
# Open http://localhost:3000 in Windows browser
```
WSL2 automatically forwards ports — the dashboard is accessible from Windows at `localhost:3000`.
---
## 3. TTS — Orpheus + Qwen3-TTS
## Key Differences: macOS vs WSL2
### 3a. Orpheus TTS (via Ollama)
Already handled in Step 1 (`ollama pull sematre/orpheus:en`).
### 3b. SNAC Decoder
```powershell
# Create models directory (match macOS layout)
$MODELS = "$PSScriptRoot\models" # or wherever you clone the repo
mkdir "$MODELS\snac_24khz" -Force
# Download SNAC decoder
curl -L -o "$MODELS\snac_24khz\config.json" `
"https://huggingface.co/hubertsiuzdak/snac_24khz/resolve/main/config.json"
curl -L -o "$MODELS\snac_24khz\pytorch_model.bin" `
"https://huggingface.co/hubertsiuzdak/snac_24khz/resolve/main/pytorch_model.bin"
```
### 3c. Python Venv + Dependencies
```powershell
cd __LOCAL_LLMs
# Create venv
python -m venv .venv-qwen-tts
# Activate (Windows uses Scripts, not bin)
.\.venv-qwen-tts\Scripts\Activate.ps1
# Install PyTorch with CUDA (NOT MPS — that's Apple only)
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
# Install other deps
pip install snac numpy soundfile
# Verify CUDA
python -c "import torch; print(f'CUDA: {torch.cuda.is_available()}, Device: {torch.cuda.get_device_name(0)}')"
# Expected: CUDA: True, Device: NVIDIA GeForce RTX 5090 Laptop GPU
```
### 3d. Qwen3-TTS 0.6B
```powershell
$MODELS = ".\models"
# Tokenizer (~650 MB)
mkdir "$MODELS\Qwen3-TTS-Tokenizer-12Hz" -Force
foreach ($f in @("config.json", "configuration.json", "preprocessor_config.json")) {
curl -L -o "$MODELS\Qwen3-TTS-Tokenizer-12Hz\$f" `
"https://huggingface.co/Qwen/Qwen3-TTS-Tokenizer-12Hz/resolve/main/$f"
}
curl -L -o "$MODELS\Qwen3-TTS-Tokenizer-12Hz\model.safetensors" `
"https://huggingface.co/Qwen/Qwen3-TTS-Tokenizer-12Hz/resolve/main/model.safetensors"
# Model weights (~1.8 GB)
mkdir "$MODELS\Qwen3-TTS-12Hz-0.6B-CustomVoice" -Force
foreach ($f in @("config.json", "generation_config.json")) {
curl -L -o "$MODELS\Qwen3-TTS-12Hz-0.6B-CustomVoice\$f" `
"https://huggingface.co/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice/resolve/main/$f"
}
curl -L -o "$MODELS\Qwen3-TTS-12Hz-0.6B-CustomVoice\model.safetensors" `
"https://huggingface.co/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice/resolve/main/model.safetensors"
```
### 3e. Test TTS
```powershell
# Activate venv
.\.venv-qwen-tts\Scripts\Activate.ps1
# Orpheus TTS test
python test_orpheus_tts.py
# Qwen3-TTS test
python test_qwen_tts.py
```
> **Key difference from macOS:** Qwen3-TTS will use **CUDA** instead of MPS.
> In `test_qwen_tts.py`, the device selection `torch.device("mps")` will fall through to CUDA automatically
> since `torch.backends.mps.is_available()` returns False on Windows.
> You may want to update the device logic to prefer CUDA:
>
> ```python
> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
> ```
---
## 4. Mission Control Dashboard
```powershell
cd __LOCAL_LLMs\dashboard
# Install dependencies
npm install
# Start dev server
npm run dev
# Open http://localhost:3000
```
The dashboard is pure Next.js — works identically on Windows. The API routes auto-detect:
- **Ollama** at `localhost:11434`
- **Whisper** models in `%USERPROFILE%\whisper-models\`
- **TTS** engines (Orpheus, Qwen3-TTS) and Python venv
### Start Script (PowerShell)
Use the bash script equivalent:
```powershell
# Quick start (manual)
ollama serve # if not already running as service
cd __LOCAL_LLMs\dashboard
npm run dev
```
> TODO: Create `start-dashboard.ps1` as a PowerShell equivalent of `start-dashboard.sh`
---
## 5. Key Differences: macOS vs Windows
| Area | macOS (M4 Pro 48 GB) | Windows (Razer Blade 18) |
| ------------------- | ----------------------------------- | ------------------------------------- |
| **GPU** | Apple Silicon (unified memory, MPS) | RTX 5090 (24 GB VRAM, CUDA) |
| **Ollama GPU** | Automatic (Metal) | Automatic (CUDA) |
| **VRAM** | Shared from 48 GB RAM | Dedicated 24 GB GDDR7 |
| **PyTorch device** | `mps` | `cuda` |
| **Whisper install** | `brew install whisper-cpp` | Build from source or download release |
| **Python venv** | `bin/activate` | `Scripts\Activate.ps1` |
| **Package manager** | Homebrew | winget / scoop |
| **Shell** | zsh / bash | PowerShell / cmd |
| **Scripts** | `.sh` (bash) | `.ps1` (PowerShell) |
| **Model download** | `hf-mirror.com` (corporate proxy) | `huggingface.co` (no proxy) |
| **Dashboard** | Identical | Identical |
| **Ollama models** | Identical | Identical |
| Area | macOS (any Mac) | WSL2 (any Linux) |
| ---------------------- | --------------------------- | -------------------------------------- |
| **GPU** | Apple Silicon (MPS) | NVIDIA (CUDA) |
| **Ollama** | macOS native (Metal) | Windows native, accessed via localhost |
| **PyTorch device** | `mps` | `cuda` |
| **Whisper install** | `brew install whisper-cpp` | Build from source with CUDA |
| **Package manager** | Homebrew | apt |
| **Shell scripts** | Work as-is | Work as-is |
| **Python venv path** | `bin/python` | `bin/python` (same) |
| **Dashboard** | Identical | Identical |
| **Ollama models path** | `~/.ollama/models/` | Windows `%USERPROFILE%\.ollama\` |
| **Model download** | `hf-mirror.com` (corporate) | `huggingface.co` (direct) |
### Performance Expectations
| Workload | macOS M4 Pro 48 GB | Razer RTX 5090 24 GB |
| --------------------------- | ---------------------------- | ------------------------- |
| qwen2.5-coder:32b inference | ~1525 tok/s (MPS/CPU blend) | ~4060 tok/s (full CUDA) |
| Whisper large-v3-turbo | ~24x realtime (CPU) | ~815x realtime (CUDA) |
| Orpheus TTS | ~realtime (CPU decode) | ~23x realtime (CUDA) |
| Qwen3-TTS | ~realtime (MPS) | ~24x realtime (CUDA) |
| 70B quantized models | Fits in 48 GB (slow) | Partially offloads to RAM |
| Workload | macOS M4 Pro 48 GB | Razer RTX 5090 24 GB |
| --------------------------- | -------------------- | ------------------------------- |
| qwen2.5-coder:32b inference | ~1525 tok/s | ~4060 tok/s |
| Whisper large-v3-turbo | ~24x realtime | ~815x realtime |
| Orpheus TTS | ~realtime | ~23x realtime |
| Qwen3-TTS | ~realtime (MPS) | ~24x realtime (CUDA) |
| 70B quantized models | Fits in 48 GB (slow) | Partially offloads to 64 GB RAM |
### VRAM Budget (RTX 5090 — 24 GB)
| Model | VRAM Usage | Fits in GPU? |
| ------------------ | ---------- | ------------ |
| llama3.1:8b | ~5 GB | ✅ Fully |
| qwen2.5-coder:7b | ~5 GB | ✅ Fully |
| sematre/orpheus:en | ~4 GB | ✅ Fully |
| qwen2.5-coder:32b | ~19 GB | ✅ Fully |
| deepseek-r1:32b | ~19 GB | ✅ Fully |
---
## 6. File Layout (Same as macOS)
## Quick Reference — Full Setup Checklist
### Windows Side
```
__LOCAL_LLMs/
├── dashboard/ ← Mission Control (port 3000) — works as-is
├── models/ ← TTS model weights (gitignored)
│ ├── snac_24khz/
│ ├── Qwen3-TTS-Tokenizer-12Hz/
│ └── Qwen3-TTS-12Hz-0.6B-CustomVoice/
├── .venv-qwen-tts/ ← Python venv (Scripts\ on Windows)
├── test_orpheus_tts.py ← works as-is (device fallback)
├── test_qwen_tts.py ← update device to prefer CUDA
├── windows_specific/
│ ├── razer-blade-18-spec.md ← hardware spec
│ └── setup-guide.md ← this file
└── docs/ ← macOS-focused docs (still useful as reference)
```
---
## 7. Quick Reference — Full Setup Checklist
```
[ ] Install NVIDIA drivers + CUDA Toolkit
[ ] Install NVIDIA drivers (Game Ready or Studio)
[ ] Install Ollama (winget install Ollama.Ollama)
[ ] Pull models: qwen2.5-coder:32b, deepseek-r1:32b, llama3.1:8b, orpheus
[ ] Install Node.js 20+ (winget)
[ ] Install Python 3.12 (winget)
[ ] Install Git (winget)
[ ] Install ffmpeg (winget)
[ ] Clone repo
[ ] Download Whisper model to %USERPROFILE%\whisper-models\
[ ] Build or download whisper-cpp with CUDA
[ ] Create Python venv + install PyTorch CUDA + snac
[ ] Download SNAC decoder
[ ] Download Qwen3-TTS tokenizer + model
[ ] npm install in dashboard/
[ ] Run dashboard: npm run dev
[ ] Pull all 5 models
[ ] Install WSL2 (wsl --install -d Ubuntu-24.04)
```
### WSL2 Side
```
[ ] Install Node.js 20+, Python 3.12, ffmpeg, git, cmake
[ ] Verify nvidia-smi shows RTX 5090
[ ] Clone repo into ~/code/mygh/
[ ] Build whisper-cpp with CUDA
[ ] Download Whisper model to ~/whisper-models/
[ ] Run: bash setup-tts.sh
[ ] Run: bash start-dashboard.sh
[ ] Verify: http://localhost:3000 shows all green
```
---
## Troubleshooting
### Ollama not accessible from WSL2
```bash
curl http://localhost:11434/api/tags
# If fails, check Windows firewall or try:
curl http://$(hostname).local:11434/api/tags
```
### CUDA not visible in WSL2
```bash
nvidia-smi
# If "command not found":
# 1. Update Windows NVIDIA drivers to latest
# 2. Run: wsl --update
# 3. Do NOT install nvidia-driver-* inside WSL2
```
### Slow filesystem performance
```bash
# Clone repos inside WSL2 filesystem: ~/code/...
# NOT in /mnt/c/ (Windows→WSL bridge is ~10x slower for node_modules)
```