fix(local-llms): cross-platform audit — 8 bugs/gaps fixed
- setup-tts.sh: make fully cross-platform (macOS + Linux/WSL2) - OS detection, apt fallback, CUDA PyTorch install, nvidia-smi check - cross-platform playback hints, HF_MIRROR env override - api/system/route.ts: fix ffmpeg detection (use -version not --version) - api/system/memory/route.ts: remove unused total variable in Linux path - api/system/exec/route.ts: expand allowlist with Linux commands (head, tail, grep, which, ps, uname, free, lscpu, nvidia-smi, etc.) - api/tts/route.ts: cross-platform venv path + CUDA/MPS label - api/whisper/route.ts: Linux binary/model paths - api/ollama/logs/route.ts: Linux log paths + WSL2 hint - test_qwen_tts.py: platform-aware speech text + CUDA device detection - test_orpheus_tts.py: platform-aware text, move import sys to top - setup-guide.md: fix false auto-detect claim, add HF_MIRROR hint
This commit is contained in:
parent
f85b455eb5
commit
b1d2e4ec81
@ -4,11 +4,15 @@ import { homedir } from 'os';
|
|||||||
import { join } from 'path';
|
import { join } from 'path';
|
||||||
import { existsSync } from 'fs';
|
import { existsSync } from 'fs';
|
||||||
|
|
||||||
|
const IS_MAC = process.platform === 'darwin';
|
||||||
|
|
||||||
export async function GET() {
|
export async function GET() {
|
||||||
const logPaths = [
|
const logPaths = [
|
||||||
join(homedir(), '.ollama', 'logs', 'server.log'),
|
join(homedir(), '.ollama', 'logs', 'server.log'),
|
||||||
join(homedir(), '.ollama', 'logs', 'gpu.log'),
|
join(homedir(), '.ollama', 'logs', 'gpu.log'),
|
||||||
'/tmp/ollama.log',
|
'/tmp/ollama.log',
|
||||||
|
// Linux / WSL2 — journalctl may write here
|
||||||
|
'/var/log/ollama.log',
|
||||||
];
|
];
|
||||||
|
|
||||||
for (const logPath of logPaths) {
|
for (const logPath of logPaths) {
|
||||||
@ -25,11 +29,13 @@ export async function GET() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// On macOS, Ollama logs via unified logging
|
// Fallback: platform-specific logging hint
|
||||||
|
const hint = IS_MAC
|
||||||
|
? 'Ollama uses macOS unified logging. Use: log show --predicate \'subsystem == "com.ollama"\' --last 5m'
|
||||||
|
: 'Ollama logs not found. If running on Windows (accessed via WSL2), check Windows Event Viewer or: journalctl -u ollama --no-pager -n 50';
|
||||||
|
|
||||||
return NextResponse.json({
|
return NextResponse.json({
|
||||||
lines: [
|
lines: [hint],
|
||||||
'Ollama uses macOS unified logging. Use: log show --predicate \'subsystem == "com.ollama"\' --last 5m',
|
|
||||||
],
|
|
||||||
path: 'system',
|
path: 'system',
|
||||||
total: 1,
|
total: 1,
|
||||||
});
|
});
|
||||||
|
|||||||
@ -5,9 +5,9 @@ import { promisify } from 'util';
|
|||||||
const execFileAsync = promisify(execFile);
|
const execFileAsync = promisify(execFile);
|
||||||
|
|
||||||
const COMMAND_ALLOWLIST = new Set([
|
const COMMAND_ALLOWLIST = new Set([
|
||||||
|
// Cross-platform (macOS + Linux)
|
||||||
'git',
|
'git',
|
||||||
'npm',
|
'npm',
|
||||||
'brew',
|
|
||||||
'cat',
|
'cat',
|
||||||
'ls',
|
'ls',
|
||||||
'wc',
|
'wc',
|
||||||
@ -15,6 +15,21 @@ const COMMAND_ALLOWLIST = new Set([
|
|||||||
'df',
|
'df',
|
||||||
'echo',
|
'echo',
|
||||||
'date',
|
'date',
|
||||||
|
'head',
|
||||||
|
'tail',
|
||||||
|
'grep',
|
||||||
|
'which',
|
||||||
|
'ps',
|
||||||
|
'uname',
|
||||||
|
'whoami',
|
||||||
|
// macOS
|
||||||
|
'brew',
|
||||||
|
// Linux / WSL2
|
||||||
|
'free',
|
||||||
|
'lscpu',
|
||||||
|
'nvidia-smi',
|
||||||
|
'dpkg',
|
||||||
|
'apt',
|
||||||
]);
|
]);
|
||||||
|
|
||||||
export async function POST(request: NextRequest) {
|
export async function POST(request: NextRequest) {
|
||||||
|
|||||||
@ -1,10 +1,13 @@
|
|||||||
import { NextResponse } from 'next/server';
|
import { NextResponse } from 'next/server';
|
||||||
import { exec } from 'child_process';
|
import { exec } from 'child_process';
|
||||||
import { promisify } from 'util';
|
import { promisify } from 'util';
|
||||||
|
import { readFile } from 'fs/promises';
|
||||||
import os from 'os';
|
import os from 'os';
|
||||||
|
|
||||||
const execAsync = promisify(exec);
|
const execAsync = promisify(exec);
|
||||||
|
|
||||||
|
const IS_MAC = process.platform === 'darwin';
|
||||||
|
|
||||||
interface ProcessInfo {
|
interface ProcessInfo {
|
||||||
pid: number;
|
pid: number;
|
||||||
name: string;
|
name: string;
|
||||||
@ -26,7 +29,7 @@ interface VmStatBreakdown {
|
|||||||
|
|
||||||
async function getTopProcesses(limit = 20): Promise<ProcessInfo[]> {
|
async function getTopProcesses(limit = 20): Promise<ProcessInfo[]> {
|
||||||
try {
|
try {
|
||||||
// ps with RSS in KB, sorted descending by RSS
|
// ps with RSS in KB, sorted descending by RSS — works on both macOS and Linux
|
||||||
const { stdout } = await execAsync(
|
const { stdout } = await execAsync(
|
||||||
`ps -axo pid=,rss=,%mem=,user=,comm= | sort -k2 -rn | head -${limit}`,
|
`ps -axo pid=,rss=,%mem=,user=,comm= | sort -k2 -rn | head -${limit}`,
|
||||||
{ timeout: 3000 }
|
{ timeout: 3000 }
|
||||||
@ -60,36 +63,67 @@ async function getTopProcesses(limit = 20): Promise<ProcessInfo[]> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function getVmStatBreakdown(): Promise<VmStatBreakdown> {
|
async function getVmStatBreakdown(): Promise<VmStatBreakdown> {
|
||||||
try {
|
if (IS_MAC) {
|
||||||
const { stdout } = await execAsync('vm_stat', { timeout: 2000 });
|
try {
|
||||||
const pageSizeMatch = stdout.match(/page size of (\d+) bytes/);
|
const { stdout } = await execAsync('vm_stat', { timeout: 2000 });
|
||||||
const pageSize = pageSizeMatch ? parseInt(pageSizeMatch[1]) : 16384;
|
const pageSizeMatch = stdout.match(/page size of (\d+) bytes/);
|
||||||
const parse = (label: string): number => {
|
const pageSize = pageSizeMatch ? parseInt(pageSizeMatch[1]) : 16384;
|
||||||
const match = stdout.match(new RegExp(`${label}:\\s+(\\d+)`));
|
const parse = (label: string): number => {
|
||||||
return match ? parseInt(match[1]) * pageSize : 0;
|
const match = stdout.match(new RegExp(`${label}:\\s+(\\d+)`));
|
||||||
};
|
return match ? parseInt(match[1]) * pageSize : 0;
|
||||||
return {
|
};
|
||||||
active: parse('Pages active'),
|
return {
|
||||||
wired: parse('Pages wired down'),
|
active: parse('Pages active'),
|
||||||
compressor: parse('Pages occupied by compressor'),
|
wired: parse('Pages wired down'),
|
||||||
inactive: parse('Pages inactive'),
|
compressor: parse('Pages occupied by compressor'),
|
||||||
purgeable: parse('Pages purgeable'),
|
inactive: parse('Pages inactive'),
|
||||||
speculative: parse('Pages speculative'),
|
purgeable: parse('Pages purgeable'),
|
||||||
free: parse('Pages free'),
|
speculative: parse('Pages speculative'),
|
||||||
pageSize,
|
free: parse('Pages free'),
|
||||||
};
|
pageSize,
|
||||||
} catch {
|
};
|
||||||
return {
|
} catch {
|
||||||
active: 0,
|
// fall through to zeros
|
||||||
wired: 0,
|
}
|
||||||
compressor: 0,
|
} else {
|
||||||
inactive: 0,
|
// Linux / WSL2 — parse /proc/meminfo into vm_stat-compatible structure
|
||||||
purgeable: 0,
|
try {
|
||||||
speculative: 0,
|
const raw = await readFile('/proc/meminfo', 'utf-8');
|
||||||
free: 0,
|
const parse = (key: string): number => {
|
||||||
pageSize: 16384,
|
const match = raw.match(new RegExp(`${key}:\\s+(\\d+)`));
|
||||||
};
|
return match ? parseInt(match[1]) * 1024 : 0;
|
||||||
|
};
|
||||||
|
const free = parse('MemFree');
|
||||||
|
const buffers = parse('Buffers');
|
||||||
|
const cached = parse('Cached');
|
||||||
|
const sReclaimable = parse('SReclaimable');
|
||||||
|
const active = parse('Active');
|
||||||
|
|
||||||
|
return {
|
||||||
|
active,
|
||||||
|
wired: buffers, // closest analogy
|
||||||
|
compressor: parse('SwapCached'),
|
||||||
|
inactive: parse('Inactive'),
|
||||||
|
purgeable: sReclaimable,
|
||||||
|
speculative: 0,
|
||||||
|
free,
|
||||||
|
pageSize: 4096,
|
||||||
|
};
|
||||||
|
} catch {
|
||||||
|
// fall through to zeros
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
active: 0,
|
||||||
|
wired: 0,
|
||||||
|
compressor: 0,
|
||||||
|
inactive: 0,
|
||||||
|
purgeable: 0,
|
||||||
|
speculative: 0,
|
||||||
|
free: 0,
|
||||||
|
pageSize: IS_MAC ? 16384 : 4096,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function GET() {
|
export async function GET() {
|
||||||
|
|||||||
@ -1,11 +1,14 @@
|
|||||||
import { NextResponse } from 'next/server';
|
import { NextResponse } from 'next/server';
|
||||||
import { exec, execFile } from 'child_process';
|
import { exec, execFile } from 'child_process';
|
||||||
import { promisify } from 'util';
|
import { promisify } from 'util';
|
||||||
|
import { readFile } from 'fs/promises';
|
||||||
import os from 'os';
|
import os from 'os';
|
||||||
|
|
||||||
const execAsync = promisify(exec);
|
const execAsync = promisify(exec);
|
||||||
const execFileAsync = promisify(execFile);
|
const execFileAsync = promisify(execFile);
|
||||||
|
|
||||||
|
const IS_MAC = process.platform === 'darwin';
|
||||||
|
|
||||||
// Cache slow commands with TTL
|
// Cache slow commands with TTL
|
||||||
let staticCache: {
|
let staticCache: {
|
||||||
chip: string;
|
chip: string;
|
||||||
@ -20,10 +23,17 @@ const OLLAMA_DISK_TTL = 60 * 1000; // 60 seconds
|
|||||||
|
|
||||||
async function getChipInfo(): Promise<string> {
|
async function getChipInfo(): Promise<string> {
|
||||||
try {
|
try {
|
||||||
|
if (IS_MAC) {
|
||||||
|
const { stdout } = await execAsync(
|
||||||
|
"sysctl -n machdep.cpu.brand_string 2>/dev/null || echo 'Unknown'"
|
||||||
|
);
|
||||||
|
return stdout.trim();
|
||||||
|
}
|
||||||
|
// Linux / WSL2
|
||||||
const { stdout } = await execAsync(
|
const { stdout } = await execAsync(
|
||||||
"sysctl -n machdep.cpu.brand_string 2>/dev/null || echo 'Unknown'"
|
"lscpu 2>/dev/null | grep 'Model name' | sed 's/.*:\\s*//' || cat /proc/cpuinfo | grep 'model name' | head -1 | sed 's/.*: //'"
|
||||||
);
|
);
|
||||||
return stdout.trim();
|
return stdout.trim() || 'Unknown';
|
||||||
} catch {
|
} catch {
|
||||||
return 'Unknown';
|
return 'Unknown';
|
||||||
}
|
}
|
||||||
@ -55,30 +65,63 @@ async function getOllamaModelsDiskUsage(): Promise<number> {
|
|||||||
|
|
||||||
async function getGpuInfo(): Promise<string> {
|
async function getGpuInfo(): Promise<string> {
|
||||||
try {
|
try {
|
||||||
|
if (IS_MAC) {
|
||||||
|
const { stdout } = await execAsync(
|
||||||
|
"system_profiler SPDisplaysDataType 2>/dev/null | grep 'Chipset Model' | sed 's/.*: //'",
|
||||||
|
{ timeout: 5000 }
|
||||||
|
);
|
||||||
|
return stdout.trim() || 'Apple Silicon (integrated)';
|
||||||
|
}
|
||||||
|
// Linux / WSL2 — try nvidia-smi first, fall back to lspci
|
||||||
|
try {
|
||||||
|
const { stdout } = await execAsync(
|
||||||
|
'nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1',
|
||||||
|
{ timeout: 3000 }
|
||||||
|
);
|
||||||
|
if (stdout.trim()) return stdout.trim();
|
||||||
|
} catch {
|
||||||
|
/* no nvidia-smi */
|
||||||
|
}
|
||||||
const { stdout } = await execAsync(
|
const { stdout } = await execAsync(
|
||||||
"system_profiler SPDisplaysDataType 2>/dev/null | grep 'Chipset Model' | sed 's/.*: //'",
|
"lspci 2>/dev/null | grep -i 'vga\\|3d' | sed 's/.*: //' | head -1"
|
||||||
{ timeout: 5000 }
|
|
||||||
);
|
);
|
||||||
return stdout.trim() || 'Apple Silicon (integrated)';
|
return stdout.trim() || 'Unknown';
|
||||||
} catch {
|
} catch {
|
||||||
return 'Apple Silicon (integrated)';
|
return IS_MAC ? 'Apple Silicon (integrated)' : 'Unknown';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getBrewPackages(): Promise<Array<{ name: string; version: string }>> {
|
async function getBrewPackages(): Promise<Array<{ name: string; version: string }>> {
|
||||||
const targets = ['ollama', 'whisper-cpp', 'ffmpeg'];
|
const targets = ['ollama', 'whisper-cpp', 'ffmpeg'];
|
||||||
const results: Array<{ name: string; version: string }> = [];
|
const results: Array<{ name: string; version: string }> = [];
|
||||||
for (const pkg of targets) {
|
|
||||||
try {
|
if (IS_MAC) {
|
||||||
const { stdout } = await execFileAsync('brew', ['list', '--versions', pkg], {
|
for (const pkg of targets) {
|
||||||
timeout: 3000,
|
try {
|
||||||
});
|
const { stdout } = await execFileAsync('brew', ['list', '--versions', pkg], {
|
||||||
const parts = stdout.trim().split(' ');
|
timeout: 3000,
|
||||||
if (parts.length >= 2) {
|
});
|
||||||
results.push({ name: parts[0], version: parts.slice(1).join(' ') });
|
const parts = stdout.trim().split(' ');
|
||||||
|
if (parts.length >= 2) {
|
||||||
|
results.push({ name: parts[0], version: parts.slice(1).join(' ') });
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// not installed
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Linux / WSL2 — check via version commands (ffmpeg uses -version, others use --version)
|
||||||
|
for (const pkg of targets) {
|
||||||
|
const bin = pkg === 'whisper-cpp' ? 'whisper-cli' : pkg;
|
||||||
|
const flag = bin === 'ffmpeg' ? '-version' : '--version';
|
||||||
|
try {
|
||||||
|
const { stdout } = await execAsync(`${bin} ${flag} 2>&1 | head -1`, { timeout: 3000 });
|
||||||
|
if (stdout.trim()) {
|
||||||
|
results.push({ name: pkg, version: stdout.trim() });
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// not installed
|
||||||
}
|
}
|
||||||
} catch {
|
|
||||||
// not installed
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return results;
|
return results;
|
||||||
@ -106,7 +149,8 @@ async function getCachedOllamaDiskUsage(): Promise<number> {
|
|||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
// macOS vm_stat gives accurate memory breakdown (os.freemem() excludes reclaimable cache)
|
// macOS: vm_stat gives accurate memory breakdown (os.freemem() excludes reclaimable cache)
|
||||||
|
// Linux: /proc/meminfo gives accurate breakdown
|
||||||
async function getAccurateMemory(): Promise<{
|
async function getAccurateMemory(): Promise<{
|
||||||
total: number;
|
total: number;
|
||||||
appMemory: number;
|
appMemory: number;
|
||||||
@ -115,42 +159,66 @@ async function getAccurateMemory(): Promise<{
|
|||||||
pressure: string;
|
pressure: string;
|
||||||
}> {
|
}> {
|
||||||
const totalMem = os.totalmem();
|
const totalMem = os.totalmem();
|
||||||
try {
|
|
||||||
const { stdout } = await execAsync('vm_stat', { timeout: 2000 });
|
|
||||||
const pageSizeMatch = stdout.match(/page size of (\d+) bytes/);
|
|
||||||
const pageSize = pageSizeMatch ? parseInt(pageSizeMatch[1]) : 16384;
|
|
||||||
const parse = (label: string): number => {
|
|
||||||
const match = stdout.match(new RegExp(`${label}:\\s+(\\d+)`));
|
|
||||||
return match ? parseInt(match[1]) * pageSize : 0;
|
|
||||||
};
|
|
||||||
const active = parse('Pages active');
|
|
||||||
const wired = parse('Pages wired down');
|
|
||||||
const inactive = parse('Pages inactive');
|
|
||||||
const purgeable = parse('Pages purgeable');
|
|
||||||
const speculative = parse('Pages speculative');
|
|
||||||
const free = parse('Pages free');
|
|
||||||
const compressor = parse('Pages occupied by compressor');
|
|
||||||
|
|
||||||
const appMemory = active + wired + compressor;
|
if (IS_MAC) {
|
||||||
const cached = inactive + purgeable + speculative;
|
try {
|
||||||
// Return raw free separately from cached — no overlap
|
const { stdout } = await execAsync('vm_stat', { timeout: 2000 });
|
||||||
// available for loading = free + cached (macOS reclaims cached on demand)
|
const pageSizeMatch = stdout.match(/page size of (\d+) bytes/);
|
||||||
|
const pageSize = pageSizeMatch ? parseInt(pageSizeMatch[1]) : 16384;
|
||||||
|
const parse = (label: string): number => {
|
||||||
|
const match = stdout.match(new RegExp(`${label}:\\s+(\\d+)`));
|
||||||
|
return match ? parseInt(match[1]) * pageSize : 0;
|
||||||
|
};
|
||||||
|
const active = parse('Pages active');
|
||||||
|
const wired = parse('Pages wired down');
|
||||||
|
const inactive = parse('Pages inactive');
|
||||||
|
const purgeable = parse('Pages purgeable');
|
||||||
|
const speculative = parse('Pages speculative');
|
||||||
|
const free = parse('Pages free');
|
||||||
|
const compressor = parse('Pages occupied by compressor');
|
||||||
|
|
||||||
const ratio = appMemory / totalMem;
|
const appMemory = active + wired + compressor;
|
||||||
const pressure = ratio > 0.85 ? 'critical' : ratio > 0.7 ? 'warning' : 'normal';
|
const cached = inactive + purgeable + speculative;
|
||||||
|
|
||||||
return { total: totalMem, appMemory, cached, free, pressure };
|
const ratio = appMemory / totalMem;
|
||||||
} catch {
|
const pressure = ratio > 0.85 ? 'critical' : ratio > 0.7 ? 'warning' : 'normal';
|
||||||
// Fallback to Node.js (inaccurate on macOS but works everywhere)
|
|
||||||
const freeMem = os.freemem();
|
return { total: totalMem, appMemory, cached, free, pressure };
|
||||||
return {
|
} catch {
|
||||||
total: totalMem,
|
// fall through to generic fallback
|
||||||
appMemory: totalMem - freeMem,
|
}
|
||||||
cached: 0,
|
} else {
|
||||||
free: freeMem,
|
// Linux / WSL2 — parse /proc/meminfo
|
||||||
pressure: 'unknown',
|
try {
|
||||||
};
|
const raw = await readFile('/proc/meminfo', 'utf-8');
|
||||||
|
const parse = (key: string): number => {
|
||||||
|
const match = raw.match(new RegExp(`${key}:\\s+(\\d+)`));
|
||||||
|
return match ? parseInt(match[1]) * 1024 : 0; // /proc/meminfo is in kB
|
||||||
|
};
|
||||||
|
const total = parse('MemTotal');
|
||||||
|
const free = parse('MemFree');
|
||||||
|
const buffers = parse('Buffers');
|
||||||
|
const cached = parse('Cached') + parse('SReclaimable') + buffers;
|
||||||
|
const appMemory = total - free - cached;
|
||||||
|
|
||||||
|
const ratio = appMemory / total;
|
||||||
|
const pressure = ratio > 0.85 ? 'critical' : ratio > 0.7 ? 'warning' : 'normal';
|
||||||
|
|
||||||
|
return { total, appMemory, cached, free, pressure };
|
||||||
|
} catch {
|
||||||
|
// fall through to generic fallback
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Generic fallback (works everywhere but less accurate)
|
||||||
|
const freeMem = os.freemem();
|
||||||
|
return {
|
||||||
|
total: totalMem,
|
||||||
|
appMemory: totalMem - freeMem,
|
||||||
|
cached: 0,
|
||||||
|
free: freeMem,
|
||||||
|
pressure: 'unknown',
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function GET() {
|
export async function GET() {
|
||||||
|
|||||||
@ -6,9 +6,14 @@ import { join, resolve } from 'path';
|
|||||||
|
|
||||||
const execAsync = promisify(exec);
|
const execAsync = promisify(exec);
|
||||||
|
|
||||||
|
const IS_MAC = process.platform === 'darwin';
|
||||||
|
|
||||||
// process.cwd() = dashboard/, parent = __LOCAL_LLMs/
|
// process.cwd() = dashboard/, parent = __LOCAL_LLMs/
|
||||||
const LOCAL_LLMS_DIR = resolve(process.cwd(), '..');
|
const LOCAL_LLMS_DIR = resolve(process.cwd(), '..');
|
||||||
|
|
||||||
|
// macOS/Linux: bin/python, Windows native: Scripts/python.exe
|
||||||
|
const VENV_PYTHON = join(LOCAL_LLMS_DIR, '.venv-qwen-tts', 'bin', 'python');
|
||||||
|
|
||||||
interface TtsEngine {
|
interface TtsEngine {
|
||||||
name: string;
|
name: string;
|
||||||
type: 'ollama' | 'python';
|
type: 'ollama' | 'python';
|
||||||
@ -67,8 +72,7 @@ async function checkOrpheus(): Promise<TtsEngine> {
|
|||||||
const snacSize = hasSnac ? await getFileSize(snacPath) : 0;
|
const snacSize = hasSnac ? await getFileSize(snacPath) : 0;
|
||||||
|
|
||||||
// Check Python venv
|
// Check Python venv
|
||||||
const venvPython = join(LOCAL_LLMS_DIR, '.venv-qwen-tts', 'bin', 'python');
|
const hasVenv = await fileExists(VENV_PYTHON);
|
||||||
const hasVenv = await fileExists(venvPython);
|
|
||||||
|
|
||||||
if (hasModel && hasSnac && hasVenv) {
|
if (hasModel && hasSnac && hasVenv) {
|
||||||
engine.status = 'ready';
|
engine.status = 'ready';
|
||||||
@ -114,13 +118,14 @@ async function checkQwenTts(): Promise<TtsEngine> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const hasTokenizer = await fileExists(join(tokenizerDir, 'config.json'));
|
const hasTokenizer = await fileExists(join(tokenizerDir, 'config.json'));
|
||||||
const venvPython = join(LOCAL_LLMS_DIR, '.venv-qwen-tts', 'bin', 'python');
|
const hasVenv = await fileExists(VENV_PYTHON);
|
||||||
const hasVenv = await fileExists(venvPython);
|
|
||||||
|
|
||||||
if (hasModel && hasTokenizer && hasVenv) {
|
if (hasModel && hasTokenizer && hasVenv) {
|
||||||
engine.status = 'ready';
|
engine.status = 'ready';
|
||||||
engine.size = `${(modelSize / 1e9).toFixed(1)} GB`;
|
engine.size = `${(modelSize / 1e9).toFixed(1)} GB`;
|
||||||
engine.details = '0.6B params · 10 languages · MPS/CPU';
|
engine.details = IS_MAC
|
||||||
|
? '0.6B params · 10 languages · MPS/CPU'
|
||||||
|
: '0.6B params · 10 languages · CUDA/CPU';
|
||||||
} else if (hasModel || hasTokenizer) {
|
} else if (hasModel || hasTokenizer) {
|
||||||
engine.status = 'partial';
|
engine.status = 'partial';
|
||||||
const missing: string[] = [];
|
const missing: string[] = [];
|
||||||
@ -141,22 +146,21 @@ async function checkVenv(): Promise<{
|
|||||||
python?: string;
|
python?: string;
|
||||||
packages?: string[];
|
packages?: string[];
|
||||||
}> {
|
}> {
|
||||||
const venvPython = join(LOCAL_LLMS_DIR, '.venv-qwen-tts', 'bin', 'python');
|
const exists = await fileExists(VENV_PYTHON);
|
||||||
const exists = await fileExists(venvPython);
|
|
||||||
if (!exists) return { exists: false };
|
if (!exists) return { exists: false };
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const { stdout } = await execAsync(
|
const { stdout } = await execAsync(
|
||||||
`"${venvPython}" -c "import snac; import torch; print(f'snac={snac.__version__} torch={torch.__version__}')"`,
|
`"${VENV_PYTHON}" -c "import snac; import torch; print(f'snac={snac.__version__} torch={torch.__version__}')"`,
|
||||||
{ timeout: 5000 }
|
{ timeout: 5000 }
|
||||||
);
|
);
|
||||||
return {
|
return {
|
||||||
exists: true,
|
exists: true,
|
||||||
python: venvPython,
|
python: VENV_PYTHON,
|
||||||
packages: stdout.trim().split(' '),
|
packages: stdout.trim().split(' '),
|
||||||
};
|
};
|
||||||
} catch {
|
} catch {
|
||||||
return { exists: true, python: venvPython };
|
return { exists: true, python: VENV_PYTHON };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -7,9 +7,22 @@ import { homedir } from 'os';
|
|||||||
|
|
||||||
const execAsync = promisify(exec);
|
const execAsync = promisify(exec);
|
||||||
|
|
||||||
|
const IS_MAC = process.platform === 'darwin';
|
||||||
|
|
||||||
async function getWhisperBinaries(): Promise<string[]> {
|
async function getWhisperBinaries(): Promise<string[]> {
|
||||||
try {
|
try {
|
||||||
const { stdout } = await execAsync('ls /opt/homebrew/bin/whisper-* 2>/dev/null');
|
if (IS_MAC) {
|
||||||
|
const { stdout } = await execAsync('ls /opt/homebrew/bin/whisper-* 2>/dev/null');
|
||||||
|
return stdout
|
||||||
|
.trim()
|
||||||
|
.split('\n')
|
||||||
|
.filter(Boolean)
|
||||||
|
.map(p => p.split('/').pop() || p);
|
||||||
|
}
|
||||||
|
// Linux / WSL2 — check common locations
|
||||||
|
const { stdout } = await execAsync(
|
||||||
|
'ls /usr/local/bin/whisper-* /usr/bin/whisper-* 2>/dev/null || which whisper-cli 2>/dev/null'
|
||||||
|
);
|
||||||
return stdout
|
return stdout
|
||||||
.trim()
|
.trim()
|
||||||
.split('\n')
|
.split('\n')
|
||||||
@ -25,7 +38,9 @@ const WHISPER_MODEL_DIRS = (process.env.WHISPER_MODELS_DIR || '')
|
|||||||
.filter(Boolean)
|
.filter(Boolean)
|
||||||
.concat([
|
.concat([
|
||||||
join(homedir(), 'whisper-models'),
|
join(homedir(), 'whisper-models'),
|
||||||
'/opt/homebrew/share/whisper-cpp/models',
|
...(IS_MAC
|
||||||
|
? ['/opt/homebrew/share/whisper-cpp/models']
|
||||||
|
: ['/usr/local/share/whisper-cpp/models', '/usr/share/whisper-cpp/models']),
|
||||||
join(homedir(), '.cache', 'whisper'),
|
join(homedir(), '.cache', 'whisper'),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
|||||||
@ -3,26 +3,24 @@
|
|||||||
# TTS Setup — One-Shot Script for Fresh Laptop
|
# TTS Setup — One-Shot Script for Fresh Laptop
|
||||||
#
|
#
|
||||||
# Sets up Orpheus TTS (via Ollama) and Qwen3-TTS (direct Python)
|
# Sets up Orpheus TTS (via Ollama) and Qwen3-TTS (direct Python)
|
||||||
# on Apple Silicon Macs. Works through corporate proxy.
|
# on macOS (Apple Silicon) or Linux (CUDA GPU / WSL2).
|
||||||
#
|
#
|
||||||
# What this does:
|
# What this does:
|
||||||
# 1. Installs Python 3.12 via Homebrew (if missing)
|
# 1. Installs Python 3.12 (Homebrew on macOS, apt on Linux)
|
||||||
# 2. Creates Python venv with TTS packages
|
# 2. Creates Python venv with TTS packages (MPS on macOS, CUDA on Linux)
|
||||||
# 3. Pulls Orpheus TTS model via Ollama
|
# 3. Pulls Orpheus TTS model via Ollama
|
||||||
# 4. Downloads SNAC audio decoder via hf-mirror.com
|
# 4. Downloads SNAC audio decoder
|
||||||
# 5. (Optional) Downloads Qwen3-TTS 0.6B via hf-mirror.com
|
# 5. (Optional) Downloads Qwen3-TTS 0.6B
|
||||||
#
|
#
|
||||||
# Prerequisites:
|
# Prerequisites:
|
||||||
# - macOS with Apple Silicon (M1/M2/M3/M4)
|
# macOS: Homebrew + Ollama installed
|
||||||
# - Homebrew installed
|
# Linux: apt + Ollama accessible at localhost:11434
|
||||||
# - Ollama installed (brew install ollama)
|
|
||||||
#
|
#
|
||||||
# Usage:
|
# Usage:
|
||||||
# bash setup-tts.sh
|
# bash setup-tts.sh
|
||||||
#
|
#
|
||||||
# After setup, test with:
|
# After setup, test with:
|
||||||
# .venv-qwen-tts/bin/python test_orpheus_tts.py
|
# .venv-qwen-tts/bin/python test_orpheus_tts.py
|
||||||
# afplay test_orpheus_tara.wav
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
@ -31,7 +29,13 @@ VENV="$SCRIPT_DIR/.venv-qwen-tts"
|
|||||||
MODELS_DIR="$SCRIPT_DIR/models"
|
MODELS_DIR="$SCRIPT_DIR/models"
|
||||||
|
|
||||||
# HuggingFace mirror that works through corporate proxy
|
# HuggingFace mirror that works through corporate proxy
|
||||||
HF_MIRROR="https://hf-mirror.com"
|
# On personal machines, set HF_MIRROR=https://huggingface.co to download directly
|
||||||
|
HF_MIRROR="${HF_MIRROR:-https://hf-mirror.com}"
|
||||||
|
|
||||||
|
# Detect OS
|
||||||
|
OS_TYPE="$(uname -s)"
|
||||||
|
IS_MAC=false
|
||||||
|
[ "$OS_TYPE" = "Darwin" ] && IS_MAC=true
|
||||||
|
|
||||||
RED='\033[0;31m'
|
RED='\033[0;31m'
|
||||||
GREEN='\033[0;32m'
|
GREEN='\033[0;32m'
|
||||||
@ -52,34 +56,58 @@ echo ""
|
|||||||
# ── 0. Check prerequisites ──────────────────────────────────
|
# ── 0. Check prerequisites ──────────────────────────────────
|
||||||
step "Checking prerequisites"
|
step "Checking prerequisites"
|
||||||
|
|
||||||
# Homebrew
|
if $IS_MAC; then
|
||||||
if ! command -v brew &>/dev/null; then
|
# Homebrew
|
||||||
fail "Homebrew not found. Install: /bin/bash -c \"\$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)\""
|
if ! command -v brew &>/dev/null; then
|
||||||
fi
|
fail "Homebrew not found. Install: /bin/bash -c \"\$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)\""
|
||||||
ok "Homebrew"
|
fi
|
||||||
|
ok "Homebrew"
|
||||||
|
|
||||||
# Ollama
|
# Ollama (install via Homebrew if missing)
|
||||||
if ! command -v ollama &>/dev/null; then
|
if ! command -v ollama &>/dev/null; then
|
||||||
warn "Ollama not found. Installing..."
|
warn "Ollama not found. Installing..."
|
||||||
brew install ollama
|
brew install ollama
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
# Linux / WSL2 — Ollama should be installed on host or via install script
|
||||||
|
if ! command -v ollama &>/dev/null; then
|
||||||
|
# On WSL2 Ollama runs on the Windows side; check if reachable
|
||||||
|
if ! curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then
|
||||||
|
fail "Ollama not found and not reachable at localhost:11434. Install Ollama on Windows or run: curl -fsSL https://ollama.com/install.sh | sh"
|
||||||
|
fi
|
||||||
|
ok "Ollama reachable at localhost:11434 (Windows host)"
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
ok "Ollama installed"
|
ok "Ollama installed"
|
||||||
|
|
||||||
# Check if Ollama is running
|
# Check if Ollama is running
|
||||||
if ! curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then
|
if ! curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then
|
||||||
warn "Ollama not running. Starting..."
|
warn "Ollama not running. Starting..."
|
||||||
ollama serve &>/dev/null &
|
if command -v ollama &>/dev/null; then
|
||||||
sleep 3
|
ollama serve &>/dev/null &
|
||||||
|
sleep 3
|
||||||
|
fi
|
||||||
if ! curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then
|
if ! curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then
|
||||||
fail "Could not start Ollama. Try manually: ollama serve"
|
fail "Could not start Ollama. Try manually: ollama serve"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
ok "Ollama running on port 11434"
|
ok "Ollama running on port 11434"
|
||||||
|
|
||||||
# Apple Silicon check
|
# GPU check
|
||||||
ARCH=$(uname -m)
|
ARCH=$(uname -m)
|
||||||
if [ "$ARCH" != "arm64" ]; then
|
if $IS_MAC; then
|
||||||
warn "Not Apple Silicon ($ARCH). MPS acceleration won't be available."
|
if [ "$ARCH" != "arm64" ]; then
|
||||||
|
warn "Not Apple Silicon ($ARCH). MPS acceleration won't be available."
|
||||||
|
else
|
||||||
|
ok "Apple Silicon ($ARCH) — MPS acceleration available"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
if command -v nvidia-smi &>/dev/null; then
|
||||||
|
GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1)
|
||||||
|
ok "NVIDIA GPU detected: $GPU_NAME — CUDA acceleration available"
|
||||||
|
else
|
||||||
|
warn "nvidia-smi not found. CUDA acceleration won't be available (CPU fallback)."
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# ── 1. Install Python 3.12 ──────────────────────────────────
|
# ── 1. Install Python 3.12 ──────────────────────────────────
|
||||||
@ -87,7 +115,7 @@ step "Python 3.12"
|
|||||||
|
|
||||||
PYTHON_CMD=""
|
PYTHON_CMD=""
|
||||||
# Check various Python 3.12 locations
|
# Check various Python 3.12 locations
|
||||||
for cmd in python3.12 /opt/homebrew/bin/python3.12 /usr/local/bin/python3.12; do
|
for cmd in python3.12 /opt/homebrew/bin/python3.12 /usr/local/bin/python3.12 python3; do
|
||||||
if command -v "$cmd" &>/dev/null; then
|
if command -v "$cmd" &>/dev/null; then
|
||||||
PYTHON_CMD="$cmd"
|
PYTHON_CMD="$cmd"
|
||||||
break
|
break
|
||||||
@ -95,9 +123,15 @@ for cmd in python3.12 /opt/homebrew/bin/python3.12 /usr/local/bin/python3.12; do
|
|||||||
done
|
done
|
||||||
|
|
||||||
if [ -z "$PYTHON_CMD" ]; then
|
if [ -z "$PYTHON_CMD" ]; then
|
||||||
warn "Python 3.12 not found. Installing via Homebrew..."
|
if $IS_MAC; then
|
||||||
brew install python@3.12
|
warn "Python 3.12 not found. Installing via Homebrew..."
|
||||||
PYTHON_CMD="/opt/homebrew/bin/python3.12"
|
brew install python@3.12
|
||||||
|
PYTHON_CMD="/opt/homebrew/bin/python3.12"
|
||||||
|
else
|
||||||
|
warn "Python 3.12 not found. Installing via apt..."
|
||||||
|
sudo apt update && sudo apt install -y python3.12 python3.12-venv python3-pip
|
||||||
|
PYTHON_CMD="python3.12"
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
PYTHON_VER=$("$PYTHON_CMD" --version 2>&1)
|
PYTHON_VER=$("$PYTHON_CMD" --version 2>&1)
|
||||||
@ -123,7 +157,15 @@ if "$VENV/bin/python" -c "import snac" &>/dev/null; then
|
|||||||
else
|
else
|
||||||
echo "Installing packages (this may take a few minutes)..."
|
echo "Installing packages (this may take a few minutes)..."
|
||||||
"$VENV/bin/pip" install -U pip --quiet
|
"$VENV/bin/pip" install -U pip --quiet
|
||||||
"$VENV/bin/pip" install -U snac qwen-tts --quiet
|
if $IS_MAC; then
|
||||||
|
# macOS: default PyTorch includes MPS support
|
||||||
|
"$VENV/bin/pip" install -U snac qwen-tts --quiet
|
||||||
|
else
|
||||||
|
# Linux: install PyTorch with CUDA first, then snac/qwen-tts
|
||||||
|
echo "Installing PyTorch with CUDA support..."
|
||||||
|
"$VENV/bin/pip" install -U torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124 --quiet
|
||||||
|
"$VENV/bin/pip" install -U snac qwen-tts --quiet
|
||||||
|
fi
|
||||||
ok "Packages installed"
|
ok "Packages installed"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@ -144,7 +186,11 @@ step "SNAC 24kHz audio decoder (~76 MB)"
|
|||||||
mkdir -p "$MODELS_DIR/snac_24khz"
|
mkdir -p "$MODELS_DIR/snac_24khz"
|
||||||
|
|
||||||
if [ -f "$MODELS_DIR/snac_24khz/pytorch_model.bin" ]; then
|
if [ -f "$MODELS_DIR/snac_24khz/pytorch_model.bin" ]; then
|
||||||
SIZE=$(stat -f%z "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null || stat -c%s "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null)
|
if $IS_MAC; then
|
||||||
|
SIZE=$(stat -f%z "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null)
|
||||||
|
else
|
||||||
|
SIZE=$(stat -c%s "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null)
|
||||||
|
fi
|
||||||
if [ "$SIZE" -gt 1000000 ]; then
|
if [ "$SIZE" -gt 1000000 ]; then
|
||||||
ok "SNAC decoder already downloaded ($(echo "scale=1; $SIZE/1048576" | bc) MB)"
|
ok "SNAC decoder already downloaded ($(echo "scale=1; $SIZE/1048576" | bc) MB)"
|
||||||
else
|
else
|
||||||
@ -247,7 +293,11 @@ du -sh "$MODELS_DIR"/* 2>/dev/null | sed 's/^/ /'
|
|||||||
echo ""
|
echo ""
|
||||||
echo "Test commands:"
|
echo "Test commands:"
|
||||||
echo " $VENV/bin/python $SCRIPT_DIR/test_orpheus_tts.py"
|
echo " $VENV/bin/python $SCRIPT_DIR/test_orpheus_tts.py"
|
||||||
echo " afplay test_orpheus_tara.wav"
|
if $IS_MAC; then
|
||||||
|
echo " afplay test_orpheus_tara.wav"
|
||||||
|
else
|
||||||
|
echo " aplay test_orpheus_tara.wav (or: ffplay -nodisp -autoexit test_orpheus_tara.wav)"
|
||||||
|
fi
|
||||||
if [ -d "$QWEN_MODEL_DIR" ]; then
|
if [ -d "$QWEN_MODEL_DIR" ]; then
|
||||||
echo " $VENV/bin/python $SCRIPT_DIR/test_qwen_tts.py"
|
echo " $VENV/bin/python $SCRIPT_DIR/test_qwen_tts.py"
|
||||||
fi
|
fi
|
||||||
|
|||||||
@ -13,6 +13,7 @@ Usage:
|
|||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import sys
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
import struct
|
import struct
|
||||||
@ -166,7 +167,7 @@ def main():
|
|||||||
|
|
||||||
# Voices: tara, leah, jess, leo, dan, mia, zac, zoe
|
# Voices: tara, leah, jess, leo, dan, mia, zac, zoe
|
||||||
tests = [
|
tests = [
|
||||||
("Hello! This is Orpheus text to speech, running entirely on your Mac through Ollama.", "tara"),
|
("Hello! This is Orpheus text to speech, running entirely locally through Ollama.", "tara"),
|
||||||
("<laugh> That's amazing! Local AI speech generation without any cloud services!", "leo"),
|
("<laugh> That's amazing! Local AI speech generation without any cloud services!", "leo"),
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -182,7 +183,10 @@ def main():
|
|||||||
save_wav(audio, sr, outpath)
|
save_wav(audio, sr, outpath)
|
||||||
|
|
||||||
print("\n=== Done! Open the .wav files to listen. ===")
|
print("\n=== Done! Open the .wav files to listen. ===")
|
||||||
print("Play with: afplay test_orpheus_tara.wav")
|
if sys.platform == "darwin":
|
||||||
|
print("Play with: afplay test_orpheus_tara.wav")
|
||||||
|
else:
|
||||||
|
print("Play with: aplay test_orpheus_tara.wav (or: ffplay -nodisp -autoexit test_orpheus_tara.wav)")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
"""
|
"""
|
||||||
Test Qwen3-TTS 0.6B on Apple Silicon (MPS or CPU fallback).
|
Test Qwen3-TTS 0.6B (CUDA, MPS, or CPU fallback).
|
||||||
|
|
||||||
Prerequisites:
|
Prerequisites:
|
||||||
bash setup-tts.sh (one-shot: installs everything)
|
bash setup-tts.sh (one-shot: installs everything)
|
||||||
@ -24,8 +24,12 @@ if not os.path.isdir(MODEL_PATH):
|
|||||||
print("Run: bash setup-tts.sh (or: bash download-tts-models.sh qwen)")
|
print("Run: bash setup-tts.sh (or: bash download-tts-models.sh qwen)")
|
||||||
raise SystemExit(1)
|
raise SystemExit(1)
|
||||||
|
|
||||||
# Pick device: MPS if available, else CPU
|
# Pick device: CUDA > MPS > CPU
|
||||||
if torch.backends.mps.is_available():
|
if torch.cuda.is_available():
|
||||||
|
device = "cuda"
|
||||||
|
dtype = torch.float16
|
||||||
|
print(f"Using CUDA ({torch.cuda.get_device_name(0)})")
|
||||||
|
elif torch.backends.mps.is_available():
|
||||||
device = "mps"
|
device = "mps"
|
||||||
dtype = torch.float32 # MPS doesn't support bfloat16
|
dtype = torch.float32 # MPS doesn't support bfloat16
|
||||||
print(f"Using MPS (Apple Metal GPU)")
|
print(f"Using MPS (Apple Metal GPU)")
|
||||||
@ -48,7 +52,7 @@ print(f"Supported speakers: {model.get_supported_speakers()}")
|
|||||||
print(f"Supported languages: {model.get_supported_languages()}")
|
print(f"Supported languages: {model.get_supported_languages()}")
|
||||||
|
|
||||||
# Test 1: English with a built-in speaker
|
# Test 1: English with a built-in speaker
|
||||||
text = "Hello! Welcome to the local LLM dashboard. I am Qwen three T T S, running entirely on your Mac."
|
text = f"Hello! Welcome to the local LLM dashboard. I am Qwen three T T S, running entirely on your {'Mac' if device == 'mps' else 'machine'} using {device.upper()}."
|
||||||
print(f"\nGenerating speech for: {text[:60]}...")
|
print(f"\nGenerating speech for: {text[:60]}...")
|
||||||
|
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
|
|||||||
@ -1,372 +1,250 @@
|
|||||||
# Windows Setup Guide — Local LLM Stack on Razer Blade 18
|
# Windows Setup Guide — Local LLM Stack on Razer Blade 18
|
||||||
|
|
||||||
> **Hardware:** Razer Blade 18 · Intel Core Ultra 9 275HX · RTX 5090 24 GB GDDR7 · 64 GB DDR5 · 4 TB NVMe
|
> **Hardware:** Razer Blade 18 · Intel Core Ultra 9 275HX · RTX 5090 24 GB GDDR7 · 64 GB DDR5 · 4 TB NVMe
|
||||||
> **OS:** Windows 11 Home
|
> **OS:** Windows 11 Home + WSL2 (Ubuntu)
|
||||||
> **Goal:** Mirror the macOS `__LOCAL_LLMs` stack — Ollama, Whisper, TTS (Orpheus + Qwen3), Mission Control dashboard
|
> **Goal:** Mirror the macOS `__LOCAL_LLMs` stack — Ollama, Whisper, TTS (Orpheus + Qwen3), Mission Control dashboard
|
||||||
> **See also:** [razer-blade-18-spec.md](razer-blade-18-spec.md) for full hardware specs
|
> **See also:** [razer-blade-18-spec.md](razer-blade-18-spec.md) for full hardware specs
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Prerequisites
|
## Architecture: Windows-Native + WSL2
|
||||||
|
|
||||||
### 1. Windows Package Manager
|
```
|
||||||
|
┌────────────────────────────────────────────────────────┐
|
||||||
Install **winget** (ships with Windows 11) and optionally **Scoop** for CLI tools:
|
│ Windows 11 │
|
||||||
|
│ ├── NVIDIA drivers + CUDA (native) │
|
||||||
```powershell
|
│ ├── Ollama (native Windows service, port 11434) │
|
||||||
# Verify winget
|
│ └── Browser → http://localhost:3000 │
|
||||||
winget --version
|
│ │
|
||||||
|
│ ┌──────────────────────────────────────────────────┐ │
|
||||||
# Install Scoop (optional, useful for dev tools)
|
│ │ WSL2 (Ubuntu 24.04) │ │
|
||||||
Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
|
│ │ ├── Node.js, Python 3.12, ffmpeg, git │ │
|
||||||
Invoke-RestMethod -Uri https://get.scoop.sh | Invoke-Expression
|
│ │ ├── __LOCAL_LLMs/ (cloned here) │ │
|
||||||
|
│ │ │ ├── dashboard/ → npm run dev (port 3000) │ │
|
||||||
|
│ │ │ ├── setup-tts.sh (works as-is) │ │
|
||||||
|
│ │ │ ├── start-dashboard.sh (works as-is) │ │
|
||||||
|
│ │ │ └── models/ (SNAC, Qwen3-TTS) │ │
|
||||||
|
│ │ ├── whisper-cpp (CUDA build) │ │
|
||||||
|
│ │ └── .venv-qwen-tts/ (PyTorch CUDA) │ │
|
||||||
|
│ └──────────────────────────────────────────────────┘ │
|
||||||
|
└────────────────────────────────────────────────────────┘
|
||||||
```
|
```
|
||||||
|
|
||||||
### 2. NVIDIA CUDA Toolkit
|
**Why WSL2?** All existing bash scripts, Python venvs, and Node.js tooling work identically to macOS — zero porting. The dashboard API routes auto-detect macOS vs Linux at runtime via `process.platform`.
|
||||||
|
|
||||||
The RTX 5090 needs the latest CUDA drivers for GPU-accelerated inference.
|
---
|
||||||
|
|
||||||
|
## Phase 1: Windows-Native Setup
|
||||||
|
|
||||||
|
### 1. NVIDIA Drivers
|
||||||
|
|
||||||
```powershell
|
```powershell
|
||||||
# Install NVIDIA drivers (latest Game Ready or Studio)
|
# Install latest NVIDIA Game Ready or Studio drivers
|
||||||
winget install --id Nvidia.GeForceExperience
|
# Download from: https://www.nvidia.com/Download/index.aspx
|
||||||
|
|
||||||
# Install CUDA Toolkit (required for PyTorch CUDA)
|
|
||||||
winget install --id Nvidia.CUDA
|
|
||||||
# Or download from: https://developer.nvidia.com/cuda-downloads
|
|
||||||
|
|
||||||
# Verify
|
# Verify
|
||||||
nvidia-smi
|
nvidia-smi
|
||||||
|
# Should show: RTX 5090, 24 GB VRAM, CUDA 13.x+
|
||||||
```
|
```
|
||||||
|
|
||||||
Expected output should show:
|
### 2. Ollama (Windows-Native)
|
||||||
|
|
||||||
- **RTX 5090** with **24 GB** VRAM
|
Ollama runs natively on Windows and is accessible from WSL2 at `localhost:11434`.
|
||||||
- CUDA version 13.x+
|
|
||||||
|
|
||||||
### 3. Node.js (for Mission Control Dashboard)
|
|
||||||
|
|
||||||
```powershell
|
|
||||||
winget install --id OpenJS.NodeJS.LTS
|
|
||||||
# Verify
|
|
||||||
node --version # should be 20.x+
|
|
||||||
npm --version
|
|
||||||
```
|
|
||||||
|
|
||||||
### 4. Python 3.12
|
|
||||||
|
|
||||||
```powershell
|
|
||||||
winget install --id Python.Python.3.12
|
|
||||||
# Verify
|
|
||||||
python --version
|
|
||||||
pip --version
|
|
||||||
```
|
|
||||||
|
|
||||||
### 5. Git
|
|
||||||
|
|
||||||
```powershell
|
|
||||||
winget install --id Git.Git
|
|
||||||
```
|
|
||||||
|
|
||||||
### 6. ffmpeg
|
|
||||||
|
|
||||||
```powershell
|
|
||||||
winget install --id Gyan.FFmpeg
|
|
||||||
# Or: scoop install ffmpeg
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 1. Ollama — LLM Server
|
|
||||||
|
|
||||||
### Install
|
|
||||||
|
|
||||||
```powershell
|
```powershell
|
||||||
winget install --id Ollama.Ollama
|
winget install --id Ollama.Ollama
|
||||||
```
|
|
||||||
|
|
||||||
Ollama for Windows runs as a background service and automatically uses CUDA (RTX 5090).
|
|
||||||
|
|
||||||
### Verify
|
|
||||||
|
|
||||||
```powershell
|
|
||||||
ollama --version
|
|
||||||
curl http://localhost:11434/api/tags
|
|
||||||
```
|
|
||||||
|
|
||||||
### Download Models
|
|
||||||
|
|
||||||
```powershell
|
|
||||||
# Coding
|
|
||||||
ollama pull qwen2.5-coder:32b # 19 GB — primary coding model
|
|
||||||
ollama pull qwen2.5-coder:7b # 4.7 GB — fast coding
|
|
||||||
|
|
||||||
# Reasoning
|
|
||||||
ollama pull deepseek-r1:32b # 19 GB — chain-of-thought
|
|
||||||
|
|
||||||
# General
|
|
||||||
ollama pull llama3.1:8b # 4.9 GB — fast general tasks
|
|
||||||
|
|
||||||
# TTS
|
|
||||||
ollama pull sematre/orpheus:en # 4 GB — text-to-speech (8 voices)
|
|
||||||
|
|
||||||
# Verify
|
# Verify
|
||||||
ollama list
|
ollama --version
|
||||||
```
|
```
|
||||||
|
|
||||||
> **Note:** With 24 GB VRAM, Ollama will offload 32B models almost entirely to GPU.
|
### 3. Pull Models (from Windows or WSL2)
|
||||||
> On macOS (48 GB unified), the 32B models run in shared CPU/GPU memory.
|
|
||||||
> On this machine, **GPU inference will be significantly faster** for models that fit in 24 GB VRAM.
|
|
||||||
|
|
||||||
### VRAM Budget (RTX 5090 — 24 GB)
|
```bash
|
||||||
|
ollama pull qwen2.5-coder:32b # 19 GB — primary coding model
|
||||||
|
ollama pull qwen2.5-coder:7b # 4.7 GB — fast coding
|
||||||
|
ollama pull deepseek-r1:32b # 19 GB — chain-of-thought
|
||||||
|
ollama pull llama3.1:8b # 4.9 GB — fast general tasks
|
||||||
|
ollama pull sematre/orpheus:en # 4 GB — text-to-speech (8 voices)
|
||||||
|
|
||||||
| Model | VRAM Usage | Fits in GPU? |
|
ollama list # verify all 5 models
|
||||||
| ---------------------------- | ---------- | ------------ |
|
```
|
||||||
| llama3.1:8b | ~5 GB | ✅ Fully |
|
|
||||||
| qwen2.5-coder:7b | ~5 GB | ✅ Fully |
|
### 4. Install WSL2
|
||||||
| sematre/orpheus:en | ~4 GB | ✅ Fully |
|
|
||||||
| qwen2.5-coder:32b | ~19 GB | ✅ Fully |
|
```powershell
|
||||||
| deepseek-r1:32b | ~19 GB | ✅ Fully |
|
# From PowerShell (Admin)
|
||||||
| Two 7B models simultaneously | ~10 GB | ✅ Both fit |
|
wsl --install -d Ubuntu-24.04
|
||||||
|
# Reboot if prompted, then set up username/password
|
||||||
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 2. Whisper.cpp — Speech-to-Text
|
## Phase 2: WSL2 Setup
|
||||||
|
|
||||||
### Option A: Pre-built Binary (Recommended)
|
### 1. Install Dependencies
|
||||||
|
|
||||||
Download the latest release from GitHub:
|
```bash
|
||||||
|
# Update
|
||||||
|
sudo apt update && sudo apt upgrade -y
|
||||||
|
|
||||||
```powershell
|
# Node.js 20 LTS
|
||||||
# Create whisper directory
|
curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash -
|
||||||
mkdir "$env:USERPROFILE\whisper-cpp"
|
sudo apt install -y nodejs
|
||||||
cd "$env:USERPROFILE\whisper-cpp"
|
|
||||||
|
|
||||||
# Download latest release (CUDA build)
|
# Python 3.12
|
||||||
# Check: https://github.com/ggerganov/whisper.cpp/releases
|
sudo apt install -y python3.12 python3.12-venv python3-pip
|
||||||
# Look for: whisper-cublas-bin-x64.zip or whisper-cuda-bin-x64.zip
|
|
||||||
|
# Build tools + ffmpeg
|
||||||
|
sudo apt install -y ffmpeg git curl build-essential cmake
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
node --version # 20.x+
|
||||||
|
python3.12 --version
|
||||||
|
nvidia-smi # should show RTX 5090 (GPU passthrough from Windows)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Option B: Build from Source (CUDA)
|
> **Important:** Do NOT install NVIDIA drivers inside WSL2. The Windows-side driver handles GPU passthrough automatically.
|
||||||
|
|
||||||
```powershell
|
### 2. Clone Repo
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mkdir -p ~/code/mygh && cd ~/code/mygh
|
||||||
|
git clone https://github.com/saravanakumardb1/learning_ai_common_plat.git
|
||||||
|
cd learning_ai_common_plat/__LOCAL_LLMs
|
||||||
|
```
|
||||||
|
|
||||||
|
> **Performance note:** Always clone inside WSL2 filesystem (`~/code/...`), NOT in `/mnt/c/` — the Windows filesystem bridge is very slow for `node_modules`.
|
||||||
|
|
||||||
|
### 3. Whisper.cpp (CUDA build)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ~
|
||||||
git clone https://github.com/ggerganov/whisper.cpp.git
|
git clone https://github.com/ggerganov/whisper.cpp.git
|
||||||
cd whisper.cpp
|
cd whisper.cpp
|
||||||
cmake -B build -DGGML_CUDA=ON
|
cmake -B build -DGGML_CUDA=ON
|
||||||
cmake --build build --config Release
|
cmake --build build --config Release -j$(nproc)
|
||||||
```
|
sudo cp build/bin/whisper-cli /usr/local/bin/
|
||||||
|
|
||||||
### Download Whisper Model
|
# Download model (1.5 GB)
|
||||||
|
mkdir -p ~/whisper-models
|
||||||
```powershell
|
curl -L -o ~/whisper-models/ggml-large-v3-turbo.bin \
|
||||||
mkdir "$env:USERPROFILE\whisper-models"
|
|
||||||
|
|
||||||
# Download ggml-large-v3-turbo (1.5 GB)
|
|
||||||
curl -L -o "$env:USERPROFILE\whisper-models\ggml-large-v3-turbo.bin" `
|
|
||||||
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo.bin"
|
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo.bin"
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
whisper-cli --version
|
||||||
```
|
```
|
||||||
|
|
||||||
> **No corporate proxy on this machine** — download directly from `huggingface.co`.
|
> **No corporate proxy on this machine** — download directly from `huggingface.co`.
|
||||||
> The `hf-mirror.com` workaround is only needed on the corporate MacBook.
|
|
||||||
|
|
||||||
### Verify
|
### 4. TTS Setup (One-Shot)
|
||||||
|
|
||||||
```powershell
|
```bash
|
||||||
# Test transcription
|
cd ~/code/mygh/learning_ai_common_plat/__LOCAL_LLMs
|
||||||
whisper-cli -m "$env:USERPROFILE\whisper-models\ggml-large-v3-turbo.bin" -f test.wav
|
|
||||||
|
# Works exactly like macOS — downloads SNAC, Qwen3-TTS, creates venv
|
||||||
|
bash setup-tts.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
|
The script detects macOS vs Linux and installs the correct PyTorch variant (MPS on macOS, CUDA on Linux). On a personal machine, override the default HuggingFace mirror: `HF_MIRROR=https://huggingface.co bash setup-tts.sh`
|
||||||
|
|
||||||
|
### 5. Start Dashboard
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash start-dashboard.sh
|
||||||
|
# Open http://localhost:3000 in Windows browser
|
||||||
|
```
|
||||||
|
|
||||||
|
WSL2 automatically forwards ports — the dashboard is accessible from Windows at `localhost:3000`.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 3. TTS — Orpheus + Qwen3-TTS
|
## Key Differences: macOS vs WSL2
|
||||||
|
|
||||||
### 3a. Orpheus TTS (via Ollama)
|
| Area | macOS (any Mac) | WSL2 (any Linux) |
|
||||||
|
| ---------------------- | --------------------------- | -------------------------------------- |
|
||||||
Already handled in Step 1 (`ollama pull sematre/orpheus:en`).
|
| **GPU** | Apple Silicon (MPS) | NVIDIA (CUDA) |
|
||||||
|
| **Ollama** | macOS native (Metal) | Windows native, accessed via localhost |
|
||||||
### 3b. SNAC Decoder
|
| **PyTorch device** | `mps` | `cuda` |
|
||||||
|
| **Whisper install** | `brew install whisper-cpp` | Build from source with CUDA |
|
||||||
```powershell
|
| **Package manager** | Homebrew | apt |
|
||||||
# Create models directory (match macOS layout)
|
| **Shell scripts** | Work as-is | Work as-is |
|
||||||
$MODELS = "$PSScriptRoot\models" # or wherever you clone the repo
|
| **Python venv path** | `bin/python` | `bin/python` (same) |
|
||||||
mkdir "$MODELS\snac_24khz" -Force
|
| **Dashboard** | Identical | Identical |
|
||||||
|
| **Ollama models path** | `~/.ollama/models/` | Windows `%USERPROFILE%\.ollama\` |
|
||||||
# Download SNAC decoder
|
| **Model download** | `hf-mirror.com` (corporate) | `huggingface.co` (direct) |
|
||||||
curl -L -o "$MODELS\snac_24khz\config.json" `
|
|
||||||
"https://huggingface.co/hubertsiuzdak/snac_24khz/resolve/main/config.json"
|
|
||||||
curl -L -o "$MODELS\snac_24khz\pytorch_model.bin" `
|
|
||||||
"https://huggingface.co/hubertsiuzdak/snac_24khz/resolve/main/pytorch_model.bin"
|
|
||||||
```
|
|
||||||
|
|
||||||
### 3c. Python Venv + Dependencies
|
|
||||||
|
|
||||||
```powershell
|
|
||||||
cd __LOCAL_LLMs
|
|
||||||
|
|
||||||
# Create venv
|
|
||||||
python -m venv .venv-qwen-tts
|
|
||||||
|
|
||||||
# Activate (Windows uses Scripts, not bin)
|
|
||||||
.\.venv-qwen-tts\Scripts\Activate.ps1
|
|
||||||
|
|
||||||
# Install PyTorch with CUDA (NOT MPS — that's Apple only)
|
|
||||||
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
|
|
||||||
|
|
||||||
# Install other deps
|
|
||||||
pip install snac numpy soundfile
|
|
||||||
|
|
||||||
# Verify CUDA
|
|
||||||
python -c "import torch; print(f'CUDA: {torch.cuda.is_available()}, Device: {torch.cuda.get_device_name(0)}')"
|
|
||||||
# Expected: CUDA: True, Device: NVIDIA GeForce RTX 5090 Laptop GPU
|
|
||||||
```
|
|
||||||
|
|
||||||
### 3d. Qwen3-TTS 0.6B
|
|
||||||
|
|
||||||
```powershell
|
|
||||||
$MODELS = ".\models"
|
|
||||||
|
|
||||||
# Tokenizer (~650 MB)
|
|
||||||
mkdir "$MODELS\Qwen3-TTS-Tokenizer-12Hz" -Force
|
|
||||||
foreach ($f in @("config.json", "configuration.json", "preprocessor_config.json")) {
|
|
||||||
curl -L -o "$MODELS\Qwen3-TTS-Tokenizer-12Hz\$f" `
|
|
||||||
"https://huggingface.co/Qwen/Qwen3-TTS-Tokenizer-12Hz/resolve/main/$f"
|
|
||||||
}
|
|
||||||
curl -L -o "$MODELS\Qwen3-TTS-Tokenizer-12Hz\model.safetensors" `
|
|
||||||
"https://huggingface.co/Qwen/Qwen3-TTS-Tokenizer-12Hz/resolve/main/model.safetensors"
|
|
||||||
|
|
||||||
# Model weights (~1.8 GB)
|
|
||||||
mkdir "$MODELS\Qwen3-TTS-12Hz-0.6B-CustomVoice" -Force
|
|
||||||
foreach ($f in @("config.json", "generation_config.json")) {
|
|
||||||
curl -L -o "$MODELS\Qwen3-TTS-12Hz-0.6B-CustomVoice\$f" `
|
|
||||||
"https://huggingface.co/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice/resolve/main/$f"
|
|
||||||
}
|
|
||||||
curl -L -o "$MODELS\Qwen3-TTS-12Hz-0.6B-CustomVoice\model.safetensors" `
|
|
||||||
"https://huggingface.co/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice/resolve/main/model.safetensors"
|
|
||||||
```
|
|
||||||
|
|
||||||
### 3e. Test TTS
|
|
||||||
|
|
||||||
```powershell
|
|
||||||
# Activate venv
|
|
||||||
.\.venv-qwen-tts\Scripts\Activate.ps1
|
|
||||||
|
|
||||||
# Orpheus TTS test
|
|
||||||
python test_orpheus_tts.py
|
|
||||||
|
|
||||||
# Qwen3-TTS test
|
|
||||||
python test_qwen_tts.py
|
|
||||||
```
|
|
||||||
|
|
||||||
> **Key difference from macOS:** Qwen3-TTS will use **CUDA** instead of MPS.
|
|
||||||
> In `test_qwen_tts.py`, the device selection `torch.device("mps")` will fall through to CUDA automatically
|
|
||||||
> since `torch.backends.mps.is_available()` returns False on Windows.
|
|
||||||
> You may want to update the device logic to prefer CUDA:
|
|
||||||
>
|
|
||||||
> ```python
|
|
||||||
> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
||||||
> ```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 4. Mission Control Dashboard
|
|
||||||
|
|
||||||
```powershell
|
|
||||||
cd __LOCAL_LLMs\dashboard
|
|
||||||
|
|
||||||
# Install dependencies
|
|
||||||
npm install
|
|
||||||
|
|
||||||
# Start dev server
|
|
||||||
npm run dev
|
|
||||||
# Open http://localhost:3000
|
|
||||||
```
|
|
||||||
|
|
||||||
The dashboard is pure Next.js — works identically on Windows. The API routes auto-detect:
|
|
||||||
|
|
||||||
- **Ollama** at `localhost:11434`
|
|
||||||
- **Whisper** models in `%USERPROFILE%\whisper-models\`
|
|
||||||
- **TTS** engines (Orpheus, Qwen3-TTS) and Python venv
|
|
||||||
|
|
||||||
### Start Script (PowerShell)
|
|
||||||
|
|
||||||
Use the bash script equivalent:
|
|
||||||
|
|
||||||
```powershell
|
|
||||||
# Quick start (manual)
|
|
||||||
ollama serve # if not already running as service
|
|
||||||
cd __LOCAL_LLMs\dashboard
|
|
||||||
npm run dev
|
|
||||||
```
|
|
||||||
|
|
||||||
> TODO: Create `start-dashboard.ps1` as a PowerShell equivalent of `start-dashboard.sh`
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 5. Key Differences: macOS vs Windows
|
|
||||||
|
|
||||||
| Area | macOS (M4 Pro 48 GB) | Windows (Razer Blade 18) |
|
|
||||||
| ------------------- | ----------------------------------- | ------------------------------------- |
|
|
||||||
| **GPU** | Apple Silicon (unified memory, MPS) | RTX 5090 (24 GB VRAM, CUDA) |
|
|
||||||
| **Ollama GPU** | Automatic (Metal) | Automatic (CUDA) |
|
|
||||||
| **VRAM** | Shared from 48 GB RAM | Dedicated 24 GB GDDR7 |
|
|
||||||
| **PyTorch device** | `mps` | `cuda` |
|
|
||||||
| **Whisper install** | `brew install whisper-cpp` | Build from source or download release |
|
|
||||||
| **Python venv** | `bin/activate` | `Scripts\Activate.ps1` |
|
|
||||||
| **Package manager** | Homebrew | winget / scoop |
|
|
||||||
| **Shell** | zsh / bash | PowerShell / cmd |
|
|
||||||
| **Scripts** | `.sh` (bash) | `.ps1` (PowerShell) |
|
|
||||||
| **Model download** | `hf-mirror.com` (corporate proxy) | `huggingface.co` (no proxy) |
|
|
||||||
| **Dashboard** | Identical | Identical |
|
|
||||||
| **Ollama models** | Identical | Identical |
|
|
||||||
|
|
||||||
### Performance Expectations
|
### Performance Expectations
|
||||||
|
|
||||||
| Workload | macOS M4 Pro 48 GB | Razer RTX 5090 24 GB |
|
| Workload | macOS M4 Pro 48 GB | Razer RTX 5090 24 GB |
|
||||||
| --------------------------- | ---------------------------- | ------------------------- |
|
| --------------------------- | -------------------- | ------------------------------- |
|
||||||
| qwen2.5-coder:32b inference | ~15–25 tok/s (MPS/CPU blend) | ~40–60 tok/s (full CUDA) |
|
| qwen2.5-coder:32b inference | ~15–25 tok/s | ~40–60 tok/s |
|
||||||
| Whisper large-v3-turbo | ~2–4x realtime (CPU) | ~8–15x realtime (CUDA) |
|
| Whisper large-v3-turbo | ~2–4x realtime | ~8–15x realtime |
|
||||||
| Orpheus TTS | ~realtime (CPU decode) | ~2–3x realtime (CUDA) |
|
| Orpheus TTS | ~realtime | ~2–3x realtime |
|
||||||
| Qwen3-TTS | ~realtime (MPS) | ~2–4x realtime (CUDA) |
|
| Qwen3-TTS | ~realtime (MPS) | ~2–4x realtime (CUDA) |
|
||||||
| 70B quantized models | Fits in 48 GB (slow) | Partially offloads to RAM |
|
| 70B quantized models | Fits in 48 GB (slow) | Partially offloads to 64 GB RAM |
|
||||||
|
|
||||||
|
### VRAM Budget (RTX 5090 — 24 GB)
|
||||||
|
|
||||||
|
| Model | VRAM Usage | Fits in GPU? |
|
||||||
|
| ------------------ | ---------- | ------------ |
|
||||||
|
| llama3.1:8b | ~5 GB | ✅ Fully |
|
||||||
|
| qwen2.5-coder:7b | ~5 GB | ✅ Fully |
|
||||||
|
| sematre/orpheus:en | ~4 GB | ✅ Fully |
|
||||||
|
| qwen2.5-coder:32b | ~19 GB | ✅ Fully |
|
||||||
|
| deepseek-r1:32b | ~19 GB | ✅ Fully |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 6. File Layout (Same as macOS)
|
## Quick Reference — Full Setup Checklist
|
||||||
|
|
||||||
|
### Windows Side
|
||||||
|
|
||||||
```
|
```
|
||||||
__LOCAL_LLMs/
|
[ ] Install NVIDIA drivers (Game Ready or Studio)
|
||||||
├── dashboard/ ← Mission Control (port 3000) — works as-is
|
|
||||||
├── models/ ← TTS model weights (gitignored)
|
|
||||||
│ ├── snac_24khz/
|
|
||||||
│ ├── Qwen3-TTS-Tokenizer-12Hz/
|
|
||||||
│ └── Qwen3-TTS-12Hz-0.6B-CustomVoice/
|
|
||||||
├── .venv-qwen-tts/ ← Python venv (Scripts\ on Windows)
|
|
||||||
├── test_orpheus_tts.py ← works as-is (device fallback)
|
|
||||||
├── test_qwen_tts.py ← update device to prefer CUDA
|
|
||||||
├── windows_specific/
|
|
||||||
│ ├── razer-blade-18-spec.md ← hardware spec
|
|
||||||
│ └── setup-guide.md ← this file
|
|
||||||
└── docs/ ← macOS-focused docs (still useful as reference)
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 7. Quick Reference — Full Setup Checklist
|
|
||||||
|
|
||||||
```
|
|
||||||
[ ] Install NVIDIA drivers + CUDA Toolkit
|
|
||||||
[ ] Install Ollama (winget install Ollama.Ollama)
|
[ ] Install Ollama (winget install Ollama.Ollama)
|
||||||
[ ] Pull models: qwen2.5-coder:32b, deepseek-r1:32b, llama3.1:8b, orpheus
|
[ ] Pull all 5 models
|
||||||
[ ] Install Node.js 20+ (winget)
|
[ ] Install WSL2 (wsl --install -d Ubuntu-24.04)
|
||||||
[ ] Install Python 3.12 (winget)
|
```
|
||||||
[ ] Install Git (winget)
|
|
||||||
[ ] Install ffmpeg (winget)
|
### WSL2 Side
|
||||||
[ ] Clone repo
|
|
||||||
[ ] Download Whisper model to %USERPROFILE%\whisper-models\
|
```
|
||||||
[ ] Build or download whisper-cpp with CUDA
|
[ ] Install Node.js 20+, Python 3.12, ffmpeg, git, cmake
|
||||||
[ ] Create Python venv + install PyTorch CUDA + snac
|
[ ] Verify nvidia-smi shows RTX 5090
|
||||||
[ ] Download SNAC decoder
|
[ ] Clone repo into ~/code/mygh/
|
||||||
[ ] Download Qwen3-TTS tokenizer + model
|
[ ] Build whisper-cpp with CUDA
|
||||||
[ ] npm install in dashboard/
|
[ ] Download Whisper model to ~/whisper-models/
|
||||||
[ ] Run dashboard: npm run dev
|
[ ] Run: bash setup-tts.sh
|
||||||
|
[ ] Run: bash start-dashboard.sh
|
||||||
[ ] Verify: http://localhost:3000 shows all green
|
[ ] Verify: http://localhost:3000 shows all green
|
||||||
```
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Ollama not accessible from WSL2
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:11434/api/tags
|
||||||
|
# If fails, check Windows firewall or try:
|
||||||
|
curl http://$(hostname).local:11434/api/tags
|
||||||
|
```
|
||||||
|
|
||||||
|
### CUDA not visible in WSL2
|
||||||
|
|
||||||
|
```bash
|
||||||
|
nvidia-smi
|
||||||
|
# If "command not found":
|
||||||
|
# 1. Update Windows NVIDIA drivers to latest
|
||||||
|
# 2. Run: wsl --update
|
||||||
|
# 3. Do NOT install nvidia-driver-* inside WSL2
|
||||||
|
```
|
||||||
|
|
||||||
|
### Slow filesystem performance
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Clone repos inside WSL2 filesystem: ~/code/...
|
||||||
|
# NOT in /mnt/c/ (Windows→WSL bridge is ~10x slower for node_modules)
|
||||||
|
```
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user