fix(local-llms): harden WSL setup and dashboard Ollama connectivity

Stabilize Windows+WSL setup by fixing script line-ending pitfalls, WSL Ollama host detection, and dashboard startup behavior so models are detected reliably in Mission Control.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
saravanakumardb1 2026-02-22 16:44:56 -08:00
parent 6dca1bd6f1
commit 112341e162
8 changed files with 857 additions and 659 deletions

2
.gitattributes vendored Normal file
View File

@ -0,0 +1,2 @@
# Bash scripts must use LF so they run in WSL/Linux
*.sh text eol=lf

File diff suppressed because it is too large Load Diff

View File

@ -1 +1,39 @@
export const OLLAMA_URL = process.env.OLLAMA_URL || 'http://localhost:11434';
import { execSync } from 'child_process';
import fs from 'fs';
function normalizeUrl(input: string): string {
const trimmed = input.trim().replace(/\/+$/, '');
if (trimmed.startsWith('http://') || trimmed.startsWith('https://')) {
return trimmed;
}
return `http://${trimmed}`;
}
function detectWslGatewayOllamaUrl(): string | null {
try {
if (process.platform !== 'linux') return null;
const version = fs.readFileSync('/proc/version', 'utf-8').toLowerCase();
if (!version.includes('microsoft')) return null;
const gw = execSync("ip route show default | awk '{print $3}' | head -1", {
encoding: 'utf-8',
stdio: ['ignore', 'pipe', 'ignore'],
})
.trim();
if (!gw) return null;
return `http://${gw}:11434`;
} catch {
return null;
}
}
const explicit = process.env.OLLAMA_URL || process.env.OLLAMA_HOST;
const inferred = detectWslGatewayOllamaUrl();
// Priority:
// 1) OLLAMA_URL / OLLAMA_HOST (explicit config)
// 2) WSL2 gateway (Windows-hosted Ollama)
// 3) localhost default
export const OLLAMA_URL = explicit
? normalizeUrl(explicit)
: inferred || 'http://localhost:11434';

View File

@ -1,6 +1,6 @@
#!/bin/bash
# ============================================================
# TTS Setup One-Shot Script for Fresh Laptop
# TTS Setup - One-Shot Script for Fresh Laptop
#
# Sets up Orpheus TTS (via Ollama) and Qwen3-TTS (direct Python)
# on macOS (Apple Silicon) or Linux (CUDA GPU / WSL2).
@ -18,6 +18,7 @@
#
# Usage:
# bash setup-tts.sh
# From WSL with Ollama on Windows: OLLAMA_HOST=http://<Windows_IP>:11434 bash setup-tts.sh
#
# After setup, test with:
# .venv-qwen-tts/bin/python test_orpheus_tts.py
@ -32,6 +33,21 @@ MODELS_DIR="$SCRIPT_DIR/models"
# On personal machines, set HF_MIRROR=https://huggingface.co to download directly
HF_MIRROR="${HF_MIRROR:-https://hf-mirror.com}"
# Ollama API base URL (for WSL2 with Ollama on Windows, set OLLAMA_HOST=http://<Windows_IP>:11434)
OLLAMA_BASE="${OLLAMA_HOST:-http://localhost:11434}"
OLLAMA_BASE="${OLLAMA_BASE%/}"
# WSL2: if localhost fails, try Windows host via default gateway (common when Ollama runs on Windows)
if [ "$OLLAMA_BASE" = "http://localhost:11434" ] && ! curl -s --max-time 2 "$OLLAMA_BASE/api/tags" &>/dev/null; then
if [ -r /proc/version ] && grep -qi microsoft /proc/version 2>/dev/null; then
WIN_HOST=$(ip route show default 2>/dev/null | awk '{print $3}' | head -1)
if [ -n "$WIN_HOST" ] && curl -s --max-time 2 "http://${WIN_HOST}:11434/api/tags" &>/dev/null; then
OLLAMA_BASE="http://${WIN_HOST}:11434"
export OLLAMA_HOST="${OLLAMA_BASE}"
fi
fi
fi
export OLLAMA_HOST="${OLLAMA_HOST:-$OLLAMA_BASE}"
# Detect OS
OS_TYPE="$(uname -s)"
IS_MAC=false
@ -42,18 +58,18 @@ GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
ok() { echo -e "${GREEN}${NC} $1"; }
warn() { echo -e "${YELLOW}${NC} $1"; }
fail() { echo -e "${RED}${NC} $1"; exit 1; }
ok() { echo -e "${GREEN}[OK]${NC} $1"; }
warn() { echo -e "${YELLOW}[!!]${NC} $1"; }
fail() { echo -e "${RED}[FAIL]${NC} $1"; exit 1; }
step() { echo -e "\n${GREEN}=== $1 ===${NC}"; }
echo "╔══════════════════════════════════════════════╗"
echo "║ TTS Setup — Local Speech Generation ║"
echo "║ Orpheus TTS (Ollama) + Qwen3-TTS (Python) ║"
echo "╚══════════════════════════════════════════════╝"
echo "+------------------------------------------------------+"
echo "| TTS Setup - Local Speech Generation |"
echo "| Orpheus TTS (Ollama) + Qwen3-TTS (Python) |"
echo "+------------------------------------------------------+"
echo ""
# ── 0. Check prerequisites ──────────────────────────────────
# -- 0. Check prerequisites -----------------------------------
step "Checking prerequisites"
if $IS_MAC; then
@ -69,29 +85,29 @@ if $IS_MAC; then
brew install ollama
fi
else
# Linux / WSL2 Ollama should be installed on host or via install script
# Linux / WSL2 - Ollama should be installed on host or via install script
if ! command -v ollama &>/dev/null; then
# On WSL2 Ollama runs on the Windows side; check if reachable
if ! curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then
fail "Ollama not found and not reachable at localhost:11434. Install Ollama on Windows or run: curl -fsSL https://ollama.com/install.sh | sh"
if ! curl -s --max-time 2 "$OLLAMA_BASE/api/tags" &>/dev/null; then
fail "Ollama not reachable at $OLLAMA_BASE. From WSL set OLLAMA_HOST=http://<Windows_IP>:11434 (e.g. from /etc/resolv.conf nameserver)."
fi
ok "Ollama reachable at localhost:11434 (Windows host)"
ok "Ollama reachable at $OLLAMA_BASE (Windows host)"
fi
fi
ok "Ollama installed"
# Check if Ollama is running
if ! curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then
warn "Ollama not running. Starting..."
if ! curl -s --max-time 2 "$OLLAMA_BASE/api/tags" &>/dev/null; then
warn "Ollama not running at $OLLAMA_BASE. Starting..."
if command -v ollama &>/dev/null; then
ollama serve &>/dev/null &
sleep 3
fi
if ! curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then
fail "Could not start Ollama. Try manually: ollama serve"
if ! curl -s --max-time 2 "$OLLAMA_BASE/api/tags" &>/dev/null; then
fail "Could not start Ollama. Try manually: ollama serve (or from WSL set OLLAMA_HOST=http://<Windows_IP>:11434)"
fi
fi
ok "Ollama running on port 11434"
ok "Ollama running at $OLLAMA_BASE"
# GPU check
ARCH=$(uname -m)
@ -99,18 +115,18 @@ if $IS_MAC; then
if [ "$ARCH" != "arm64" ]; then
warn "Not Apple Silicon ($ARCH). MPS acceleration won't be available."
else
ok "Apple Silicon ($ARCH) MPS acceleration available"
ok "Apple Silicon ($ARCH) - MPS acceleration available"
fi
else
if command -v nvidia-smi &>/dev/null; then
GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1)
ok "NVIDIA GPU detected: $GPU_NAME CUDA acceleration available"
ok "NVIDIA GPU detected: $GPU_NAME - CUDA acceleration available"
else
warn "nvidia-smi not found. CUDA acceleration won't be available (CPU fallback)."
fi
fi
# ── 1. Install Python 3.12 ──────────────────────────────────
# -- 1. Install Python 3.12 -----------------------------------
step "Python 3.12"
PYTHON_CMD=""
@ -137,7 +153,7 @@ fi
PYTHON_VER=$("$PYTHON_CMD" --version 2>&1)
ok "$PYTHON_VER at $PYTHON_CMD"
# ── 2. Create venv ──────────────────────────────────────────
# -- 2. Create venv --------------------------------------------
step "Python virtual environment"
if [ -f "$VENV/bin/python" ]; then
@ -148,7 +164,7 @@ else
ok "Venv created at $VENV"
fi
# ── 3. Install Python packages ──────────────────────────────
# -- 3. Install Python packages --------------------------------
step "Python packages"
# Check if snac is installed (quick proxy for all packages)
@ -169,18 +185,38 @@ else
ok "Packages installed"
fi
# ── 4. Pull Orpheus TTS model ───────────────────────────────
# -- 4. Pull Orpheus TTS model ---------------------------------
step "Orpheus TTS model (Ollama)"
if ollama list 2>/dev/null | grep -q "orpheus"; then
# Helper: use ollama CLI if available, otherwise use API (WSL2 where CLI is on Windows)
ollama_list() {
if command -v ollama &>/dev/null; then
ollama list 2>/dev/null
else
curl -s "$OLLAMA_BASE/api/tags" 2>/dev/null | python3 -c "import sys,json; [print(m['name']) for m in json.load(sys.stdin).get('models',[])]" 2>/dev/null
fi
}
ollama_pull() {
local model="$1"
if command -v ollama &>/dev/null; then
ollama pull "$model"
else
echo " (Using Ollama API at $OLLAMA_BASE to pull model...)"
curl -s -X POST "$OLLAMA_BASE/api/pull" -d "{\"name\":\"$model\",\"stream\":false}" --max-time 600
echo ""
fi
}
if ollama_list | grep -q "orpheus"; then
ok "Orpheus TTS already downloaded"
else
echo "Pulling sematre/orpheus:en (4 GB)..."
NO_PROXY="ollama.com,registry.ollama.ai" ollama pull sematre/orpheus:en
ollama_pull "sematre/orpheus:en"
ok "Orpheus TTS downloaded"
fi
# ── 5. Download SNAC decoder ────────────────────────────────
# -- 5. Download SNAC decoder ----------------------------------
step "SNAC 24kHz audio decoder (~76 MB)"
mkdir -p "$MODELS_DIR/snac_24khz"
@ -235,7 +271,7 @@ else
fail "SNAC decoder failed to load. Delete models/snac_24khz/ and re-run."
fi
# ── 6. (Optional) Download Qwen3-TTS ────────────────────────
# -- 6. (Optional) Download Qwen3-TTS --------------------------
step "Qwen3-TTS 0.6B (optional, ~1.7 GB total)"
QWEN_TOKENIZER_DIR="$MODELS_DIR/Qwen3-TTS-Tokenizer-12Hz"
@ -245,7 +281,7 @@ if [ -d "$QWEN_MODEL_DIR" ] && [ -f "$QWEN_MODEL_DIR/config.json" ]; then
ok "Qwen3-TTS already downloaded"
else
echo "Qwen3-TTS 0.6B requires ~1.7 GB download (tokenizer + model)."
echo "This is optional Orpheus TTS (above) works without it."
echo "This is optional - Orpheus TTS (above) works without it."
read -p "Download Qwen3-TTS? [y/N] " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
@ -275,17 +311,17 @@ else
fi
fi
# ── Summary ──────────────────────────────────────────────────
# -- Summary ---------------------------------------------------
step "Setup Complete"
echo ""
echo "Installed components:"
echo " Orpheus TTS (Ollama): $(ollama list 2>/dev/null | grep orpheus | awk '{print $NF}' || echo 'ready')"
echo " Orpheus TTS (Ollama): $(ollama_list 2>/dev/null | grep orpheus | head -1 || echo 'ready')"
echo " SNAC decoder: $MODELS_DIR/snac_24khz/"
if [ -d "$QWEN_MODEL_DIR" ] && [ -f "$QWEN_MODEL_DIR/config.json" ]; then
echo " Qwen3-TTS 0.6B: $QWEN_MODEL_DIR/"
else
echo " Qwen3-TTS 0.6B: (not installed re-run setup to add)"
echo " Qwen3-TTS 0.6B: (not installed - re-run setup to add)"
fi
echo ""
echo "Disk usage:"

View File

@ -11,15 +11,27 @@
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
DASHBOARD_DIR="$SCRIPT_DIR/dashboard"
PORT=3000
OLLAMA_URL="http://localhost:11434"
# Ollama URL: respect OLLAMA_HOST env var, auto-detect WSL2 gateway if needed
OLLAMA_URL="${OLLAMA_HOST:-http://localhost:11434}"
OLLAMA_URL="${OLLAMA_URL%/}"
if ! curl -s --max-time 2 "$OLLAMA_URL/api/tags" &>/dev/null; then
if [ -r /proc/version ] && grep -qi microsoft /proc/version 2>/dev/null; then
GW=$(ip route show default 2>/dev/null | awk '{print $3}' | head -1)
if [ -n "$GW" ] && curl -s --max-time 2 "http://${GW}:11434/api/tags" &>/dev/null; then
OLLAMA_URL="http://${GW}:11434"
fi
fi
fi
export OLLAMA_HOST="$OLLAMA_URL"
GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[1;33m'
NC='\033[0m'
ok() { echo -e "${GREEN}${NC} $1"; }
warn() { echo -e "${YELLOW}${NC} $1"; }
fail() { echo -e "${RED}${NC} $1"; }
ok() { echo -e "${GREEN}[OK]${NC} $1"; }
warn() { echo -e "${YELLOW}[!!]${NC} $1"; }
fail() { echo -e "${RED}[FAIL]${NC} $1"; }
case "${1:-start}" in
stop)
@ -39,9 +51,9 @@ case "${1:-start}" in
# Ollama
if curl -s --max-time 2 "$OLLAMA_URL/api/tags" &>/dev/null; then
MODELS=$(curl -s "$OLLAMA_URL/api/tags" | python3 -c "import sys,json; print(len(json.load(sys.stdin).get('models',[])))" 2>/dev/null || echo "?")
ok "Ollama running ($MODELS models)"
ok "Ollama running at $OLLAMA_URL ($MODELS models)"
else
fail "Ollama not running"
fail "Ollama not running at $OLLAMA_URL"
fi
# Dashboard
if curl -s --max-time 2 "http://localhost:$PORT" &>/dev/null; then
@ -56,17 +68,21 @@ case "${1:-start}" in
echo "=== Starting Mission Control ==="
echo ""
# 1. Ensure Ollama is running
# 1. Check Ollama connectivity (don't try to start it -- on WSL2 it runs on Windows)
if curl -s --max-time 2 "$OLLAMA_URL/api/tags" &>/dev/null; then
ok "Ollama already running"
ok "Ollama running at $OLLAMA_URL"
else
if command -v ollama &>/dev/null; then
echo "Starting Ollama..."
ollama serve &>/dev/null &
sleep 2
if curl -s --max-time 2 "$OLLAMA_URL/api/tags" &>/dev/null; then
ok "Ollama started"
else
fail "Could not start Ollama. Try: ollama serve"
warn "Could not start Ollama. It may be running on Windows already."
fi
else
warn "Ollama not reachable at $OLLAMA_URL (on WSL2, ensure it is running on Windows with OLLAMA_HOST=0.0.0.0:11434)"
fi
fi
@ -83,12 +99,14 @@ case "${1:-start}" in
ok "Dependencies installed"
fi
# 4. Start dashboard
# 4. Start dashboard (use node directly -- npx/next may lack +x on /mnt/ NTFS)
echo "Starting dashboard on port $PORT..."
(cd "$DASHBOARD_DIR" && npm run dev &>/dev/null &)
LOGFILE="$DASHBOARD_DIR/.next-dev.log"
(cd "$DASHBOARD_DIR" && OLLAMA_HOST="$OLLAMA_URL" node node_modules/next/dist/bin/next dev > "$LOGFILE" 2>&1 &)
# Wait for it to be ready
for i in $(seq 1 15); do
# Wait for it to be ready (Next.js on /mnt/d can be slow to compile)
echo " Waiting for Next.js to compile (this can take 1-2 min on /mnt/d)..."
for i in $(seq 1 90); do
if curl -s --max-time 1 "http://localhost:$PORT" &>/dev/null; then
ok "Dashboard ready at http://localhost:$PORT"
echo ""
@ -99,7 +117,11 @@ case "${1:-start}" in
sleep 1
done
fail "Dashboard did not start within 15s. Check: cd dashboard && npm run dev"
warn "Dashboard did not respond within 90s."
echo " It may still be compiling. Check the log:"
echo " tail -f $LOGFILE"
echo " Or start manually:"
echo " cd $DASHBOARD_DIR && npx next dev"
exit 1
;;

View File

@ -0,0 +1,23 @@
#!/bin/bash
# Wrapper: fix line endings, detect Windows host IP, run TTS setup
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
LLMS_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
# Fix CRLF
sed -i 's/\r$//' "$LLMS_DIR/setup-tts.sh" 2>/dev/null
# Detect Windows host IP via default gateway
GW=$(ip route show default 2>/dev/null | awk '{print $3}' | head -1)
if [ -n "$GW" ] && curl -s --max-time 2 "http://${GW}:11434/api/tags" >/dev/null 2>&1; then
export OLLAMA_HOST="http://${GW}:11434"
echo "Ollama found at $OLLAMA_HOST (via default gateway)"
elif curl -s --max-time 2 "http://localhost:11434/api/tags" >/dev/null 2>&1; then
export OLLAMA_HOST="http://localhost:11434"
echo "Ollama found at $OLLAMA_HOST"
else
echo "WARNING: Ollama not found. The script may fail at prerequisite check."
fi
export HF_MIRROR="${HF_MIRROR:-https://huggingface.co}"
cd "$LLMS_DIR"
exec bash setup-tts.sh

View File

@ -1,5 +1,5 @@
# ============================================================
# Windows-Side Setup Local LLM Stack (Razer Blade 18)
# Windows-Side Setup - Local LLM Stack (Razer Blade 18)
#
# Run this FIRST from PowerShell (Admin) on a fresh Windows machine.
# After this completes, open WSL2 and run setup-wsl.sh.
@ -27,12 +27,12 @@ function Write-Step { param($msg) Write-Host "`n=== $msg ===" -ForegroundColor
Write-Host ""
Write-Host " =====================================" -ForegroundColor Cyan
Write-Host " Local LLM Stack Windows Setup" -ForegroundColor Cyan
Write-Host " Local LLM Stack - Windows Setup" -ForegroundColor Cyan
Write-Host " Razer Blade 18 / RTX 5090" -ForegroundColor Cyan
Write-Host " =====================================" -ForegroundColor Cyan
Write-Host ""
# ── 1. Check NVIDIA Drivers ──────────────────────────────────
# -- 1. Check NVIDIA Drivers ----------------------------------
Write-Step "Step 1/4: NVIDIA Drivers + CUDA"
$nvidiaSmi = Get-Command nvidia-smi -ErrorAction SilentlyContinue
@ -53,7 +53,7 @@ if ($nvidiaSmi) {
if ($continue -ne "y") { exit 1 }
}
# ── 2. Install Ollama ────────────────────────────────────────
# -- 2. Install Ollama -----------------------------------------
Write-Step "Step 2/4: Ollama"
$ollamaCmd = Get-Command ollama -ErrorAction SilentlyContinue
@ -93,9 +93,9 @@ try {
}
}
# ── 3. Pull Models ───────────────────────────────────────────
# -- 3. Pull Models --------------------------------------------
Write-Step "Step 3/4: Pull Ollama Models"
# Curated for coding + reasoning + TTS; update from https://ollama.com/models as needed
$models = @(
@{ name = "qwen2.5-coder:32b"; desc = "19 GB - primary coding model" },
@{ name = "qwen2.5-coder:7b"; desc = "4.7 GB - fast coding" },
@ -117,14 +117,14 @@ try {
foreach ($model in $models) {
$alreadyPulled = $existingModels | Where-Object { $_ -like "*$($model.name)*" }
if ($alreadyPulled) {
Write-OK "$($model.name) already pulled"
Write-OK "$($model.name) - already pulled"
} else {
Write-Host " Pulling $($model.name) ($($model.desc))..."
& ollama pull $model.name
if ($LASTEXITCODE -eq 0) {
Write-OK "$($model.name) pulled"
Write-OK "$($model.name) - pulled"
} else {
Write-Warn "$($model.name) pull failed (you can retry later: ollama pull $($model.name))"
Write-Warn "$($model.name) - pull failed (you can retry later: ollama pull $($model.name))"
}
}
}
@ -134,7 +134,7 @@ Write-Host ""
Write-Host " Installed models:"
& ollama list
# ── 4. Install WSL2 ──────────────────────────────────────────
# -- 4. Install WSL2 -------------------------------------------
Write-Step "Step 4/4: WSL2 (Ubuntu 24.04)"
$wslInstalled = $false
@ -158,19 +158,19 @@ if (-not $wslInstalled) {
Write-Warn "After reboot, Ubuntu will ask you to set up a username/password."
}
# ── Summary ──────────────────────────────────────────────────
Write-Host ""
Write-Host " =====================================" -ForegroundColor Green
Write-Host " Windows-Side Setup Complete!" -ForegroundColor Green
Write-Host " =====================================" -ForegroundColor Green
Write-Host ""
Write-Host " Next steps:" -ForegroundColor Yellow
Write-Host " 1. If WSL2 was just installed, reboot and set up Ubuntu username/password"
Write-Host " 2. Open Ubuntu (WSL2) terminal"
Write-Host " 3. Run the WSL2 setup script:"
Write-Host ""
Write-Host " curl -fsSL https://raw.githubusercontent.com/saravanakumardb1/learning_ai_common_plat/main/__LOCAL_LLMs/windows_specific/setup-wsl.sh | bash" -ForegroundColor Cyan
Write-Host ""
Write-Host " Or if you already cloned the repo:"
Write-Host " bash ~/code/mygh/learning_ai_common_plat/__LOCAL_LLMs/windows_specific/setup-wsl.sh" -ForegroundColor Cyan
Write-Host ""
# -- Summary ---------------------------------------------------
Write-Host ''
Write-Host ' =====================================' -ForegroundColor Green
Write-Host ' Windows-Side Setup Complete!' -ForegroundColor Green
Write-Host ' =====================================' -ForegroundColor Green
Write-Host ''
Write-Host ' Next steps:' -ForegroundColor Yellow
Write-Host ' 1. If WSL2 was just installed, reboot and set up Ubuntu username/password'
Write-Host ' 2. Open Ubuntu (WSL2) terminal'
Write-Host ' 3. Run the WSL2 setup script:'
Write-Host ''
Write-Host ' curl -fsSL https://raw.githubusercontent.com/saravanakumardb1/learning_ai_common_plat/main/__LOCAL_LLMs/windows_specific/setup-wsl.sh | bash' -ForegroundColor Cyan
Write-Host ''
Write-Host ' Or if you already cloned the repo:'
Write-Host ' bash ~/code/mygh/learning_ai_common_plat/__LOCAL_LLMs/windows_specific/setup-wsl.sh' -ForegroundColor Cyan
Write-Host ''

View File

@ -1,15 +1,15 @@
#!/bin/bash
# ============================================================
# WSL2-Side Setup Local LLM Stack (Razer Blade 18)
# WSL2-Side Setup - Local LLM Stack (Razer Blade 18)
#
# Run this INSIDE WSL2 (Ubuntu 24.04) after setup-windows.ps1.
# This is a one-shot script safe to re-run (idempotent).
# This is a one-shot script - safe to re-run (idempotent).
#
# What this does:
# 1. Installs system deps (Node.js 20, Python 3.12, ffmpeg, cmake)
# 2. Verifies NVIDIA GPU passthrough (CUDA)
# 3. Clones the repo (if not already cloned)
# 4. Builds whisper.cpp with CUDA
# 4. Builds whisper.cpp with CUDA (auto-detects 120 for Blackwell/RTX 50 when nvcc 12.8+)
# 5. Downloads Whisper large-v3-turbo model
# 6. Runs setup-tts.sh (Orpheus + Qwen3-TTS)
# 7. Starts the Mission Control dashboard
@ -17,6 +17,9 @@
# Usage:
# bash setup-wsl.sh
#
# When run from inside the repo (e.g. script path under __LOCAL_LLMs/), uses
# that repo and skips clone. Or set EXISTING_REPO_PATH=/path/to/repo to use it.
#
# Or one-liner from a fresh WSL2 terminal:
# curl -fsSL https://raw.githubusercontent.com/saravanakumardb1/learning_ai_common_plat/main/__LOCAL_LLMs/windows_specific/setup-wsl.sh | bash
# ============================================================
@ -35,6 +38,15 @@ step() { echo -e "\n${CYAN}=== $1 ===${NC}"; }
REPO_URL="https://github.com/saravanakumardb1/learning_ai_common_plat.git"
REPO_DIR="$HOME/code/mygh/learning_ai_common_plat"
# Use existing repo if run from repo path or EXISTING_REPO_PATH is set
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
if [ -n "${EXISTING_REPO_PATH:-}" ]; then
REPO_DIR="$(cd "$EXISTING_REPO_PATH" && pwd)"
elif [ -d "$SCRIPT_DIR/../.." ] && [ -d "$SCRIPT_DIR/../../__LOCAL_LLMs" ]; then
REPO_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
fi
LLMS_DIR="$REPO_DIR/__LOCAL_LLMs"
WHISPER_DIR="$HOME/whisper-cpp"
WHISPER_MODEL_DIR="$HOME/whisper-models"
@ -42,12 +54,12 @@ WHISPER_MODEL_FILE="$WHISPER_MODEL_DIR/ggml-large-v3-turbo.bin"
echo ""
echo -e " ${CYAN}=====================================${NC}"
echo -e " ${CYAN}Local LLM Stack WSL2 Setup${NC}"
echo -e " ${CYAN}Local LLM Stack - WSL2 Setup${NC}"
echo -e " ${CYAN}Ubuntu 24.04 + CUDA${NC}"
echo -e " ${CYAN}=====================================${NC}"
echo ""
# ── 1. System Dependencies ───────────────────────────────────
# -- 1. System Dependencies -----------------------------------
step "Step 1/7: System Dependencies"
sudo apt update -qq
@ -92,7 +104,7 @@ if [ -n "$PKGS_TO_INSTALL" ]; then
fi
ok "ffmpeg, git, curl, build-essential, cmake"
# ── 2. NVIDIA GPU Passthrough ────────────────────────────────
# -- 2. NVIDIA GPU Passthrough ---------------------------------
step "Step 2/7: NVIDIA GPU (CUDA passthrough)"
if command -v nvidia-smi &>/dev/null; then
@ -111,15 +123,28 @@ else
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
fi
# ── 3. Verify Ollama (Windows-side) ──────────────────────────
# -- 3. Verify Ollama (Windows-side) ---------------------------
step "Step 3/7: Ollama Connectivity"
if curl -s --max-time 3 http://localhost:11434/api/tags &>/dev/null; then
MODEL_COUNT=$(curl -s http://localhost:11434/api/tags | python3 -c "import sys,json; print(len(json.load(sys.stdin).get('models',[])))" 2>/dev/null || echo "?")
ok "Ollama reachable at localhost:11434 ($MODEL_COUNT models)"
OLLAMA_URL="${OLLAMA_HOST:-http://localhost:11434}"
OLLAMA_URL="${OLLAMA_URL%/}"
# WSL2: if localhost fails, try Windows host via default gateway
if ! curl -s --max-time 3 "$OLLAMA_URL/api/tags" &>/dev/null; then
if [ -r /proc/version ] && grep -qi microsoft /proc/version 2>/dev/null; then
GW=$(ip route show default 2>/dev/null | awk '{print $3}' | head -1)
if [ -n "$GW" ] && curl -s --max-time 3 "http://${GW}:11434/api/tags" &>/dev/null; then
OLLAMA_URL="http://${GW}:11434"
fi
fi
fi
export OLLAMA_HOST="$OLLAMA_URL"
if curl -s --max-time 3 "$OLLAMA_URL/api/tags" &>/dev/null; then
MODEL_COUNT=$(curl -s "$OLLAMA_URL/api/tags" | python3 -c "import sys,json; print(len(json.load(sys.stdin).get('models',[])))" 2>/dev/null || echo "?")
ok "Ollama reachable at $OLLAMA_URL ($MODEL_COUNT models)"
else
warn "Ollama not reachable at localhost:11434"
echo " Make sure Ollama is running on the Windows side."
warn "Ollama not reachable at $OLLAMA_URL"
echo " Make sure Ollama is running on Windows with OLLAMA_HOST=0.0.0.0:11434"
echo " Open PowerShell and run: ollama serve"
echo ""
read -p " Continue anyway? (y/N) " -n 1 -r
@ -127,13 +152,41 @@ else
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
fi
# ── 4. Clone Repo ────────────────────────────────────────────
# -- 4. Clone Repo ----------------------------------------------
step "Step 4/7: Clone Repository"
DEFAULT_REPO_DIR="$HOME/code/mygh/learning_ai_common_plat"
# Performance: /mnt/* is slower for node_modules. Keep setup fast by default.
# Optional: set MIRROR_TO_WSL_LOCAL=1 to copy repo to $DEFAULT_REPO_DIR.
if [[ "$REPO_DIR" == /mnt/* ]]; then
warn "Repo is on Windows mount: $REPO_DIR"
echo " This works, but dashboard/node_modules can be slower on /mnt/*."
if [ -d "$DEFAULT_REPO_DIR/__LOCAL_LLMs" ]; then
REPO_DIR="$DEFAULT_REPO_DIR"
LLMS_DIR="$REPO_DIR/__LOCAL_LLMs"
ok "Using existing WSL-local repo at $REPO_DIR"
elif [ "${MIRROR_TO_WSL_LOCAL:-0}" = "1" ] && [ -d "$REPO_DIR/.git" ]; then
echo " MIRROR_TO_WSL_LOCAL=1 -> copying repo to:"
echo " $DEFAULT_REPO_DIR"
mkdir -p "$(dirname "$DEFAULT_REPO_DIR")"
cp -a "$REPO_DIR" "$DEFAULT_REPO_DIR"
REPO_DIR="$DEFAULT_REPO_DIR"
LLMS_DIR="$REPO_DIR/__LOCAL_LLMs"
ok "Copied repo to $REPO_DIR"
else
echo " Continuing on current path for speed."
echo " Tip: to mirror once into WSL-local FS, re-run with:"
echo " MIRROR_TO_WSL_LOCAL=1 bash setup-wsl.sh"
fi
fi
if [ -d "$LLMS_DIR" ]; then
ok "Repo already cloned at $REPO_DIR"
# Only pull when using the default clone path.
if [ "$REPO_DIR" = "$DEFAULT_REPO_DIR" ]; then
echo " Pulling latest..."
(cd "$REPO_DIR" && git pull --ff-only origin main 2>/dev/null || true)
fi
else
echo " Cloning into $REPO_DIR..."
mkdir -p "$(dirname "$REPO_DIR")"
@ -147,7 +200,7 @@ if [ ! -d "$LLMS_DIR" ]; then
fi
ok "__LOCAL_LLMs directory: $LLMS_DIR"
# ── 5. Build Whisper.cpp (CUDA) ──────────────────────────────
# -- 5. Build Whisper.cpp (CUDA) ---------------------------------
step "Step 5/7: Whisper.cpp (CUDA build)"
if command -v whisper-cli &>/dev/null; then
@ -168,9 +221,27 @@ else
echo " Installing CUDA toolkit for compilation..."
sudo apt install -y nvidia-cuda-toolkit -qq 2>/dev/null || true
fi
cmake -B build -DGGML_CUDA=ON 2>/dev/null || cmake -B build
# Use latest GPU arch supported by this nvcc: Blackwell (RTX 50) = 120 (CUDA 12.8+), else Ada = 90.
if [ -n "${CMAKE_CUDA_ARCHITECTURES:-}" ]; then
CUDA_ARCH="$CMAKE_CUDA_ARCHITECTURES"
echo " Using CUDA architectures: $CUDA_ARCH (from CMAKE_CUDA_ARCHITECTURES)"
else
warn "No CUDA — building CPU-only whisper.cpp"
NVCC_VER=""
if command -v nvcc &>/dev/null; then
NVCC_VER=$(nvcc --version 2>/dev/null | sed -n 's/.*release \([0-9]*\)\.\([0-9]*\).*/\1.\2/p' | head -1)
fi
# Blackwell (sm_120) requires CUDA 12.8+
if [ -n "$NVCC_VER" ] && [ "$(printf '%s\n' "12.8" "$NVCC_VER" | sort -V | tail -1)" = "$NVCC_VER" ]; then
CUDA_ARCH="120"
echo " Using CUDA architecture 120 (Blackwell / RTX 50) - nvcc $NVCC_VER"
else
CUDA_ARCH="90"
echo " Using CUDA architecture 90 (Ada) - nvcc ${NVCC_VER:-unknown} (use CUDA 12.8+ for Blackwell 120)"
fi
fi
cmake -B build -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCH}" 2>/dev/null || cmake -B build -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCH}"
else
warn "No CUDA - building CPU-only whisper.cpp"
cmake -B build
fi
@ -198,18 +269,23 @@ if [ ! -f "$WHISPER_MODEL_FILE" ]; then
ok "Whisper model downloaded"
fi
# ── 6. TTS Setup ─────────────────────────────────────────────
# -- 6. TTS Setup ------------------------------------------------
step "Step 6/7: TTS Setup (Orpheus + Qwen3-TTS)"
cd "$LLMS_DIR"
# Ensure shell scripts have LF line endings (Windows clone may have CRLF)
for f in setup-tts.sh start-dashboard.sh download-tts-models.sh; do
[ -f "$f" ] && sed -i 's/\r$//' "$f" 2>/dev/null || true
done
# Use huggingface.co directly (personal machine, no corporate proxy)
echo " Running setup-tts.sh (this may take several minutes on first run)..."
HF_MIRROR=https://huggingface.co bash setup-tts.sh
ok "TTS setup complete"
# ── 7. Start Dashboard ───────────────────────────────────────
# -- 7. Start Dashboard ------------------------------------------
step "Step 7/7: Mission Control Dashboard"
cd "$LLMS_DIR"
@ -229,7 +305,7 @@ else
bash start-dashboard.sh
fi
# ── Summary ──────────────────────────────────────────────────
# -- Summary -----------------------------------------------------
echo ""
echo -e " ${GREEN}=====================================${NC}"
echo -e " ${GREEN}WSL2 Setup Complete!${NC}"