fix(local-llms): harden WSL setup and dashboard Ollama connectivity

Stabilize Windows+WSL setup by fixing script line-ending pitfalls, WSL Ollama host detection, and dashboard startup behavior so models are detected reliably in Mission Control. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-22 16:44:56 -08:00 · 2026-02-22 16:44:56 -08:00 · 112341e162
commit 112341e162
parent 6dca1bd6f1
8 changed files with 857 additions and 659 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -0,0 +1,2 @@
+# Bash scripts must use LF so they run in WSL/Linux
+*.sh text eol=lf
--- a/__LOCAL_LLMs/dashboard/package-lock.json
+++ b/__LOCAL_LLMs/dashboard/package-lock.json
--- a/__LOCAL_LLMs/dashboard/src/app/lib/ollama-config.ts
+++ b/__LOCAL_LLMs/dashboard/src/app/lib/ollama-config.ts
@ -1 +1,39 @@
-export const OLLAMA_URL = process.env.OLLAMA_URL || 'http://localhost:11434';
+import { execSync } from 'child_process';
+import fs from 'fs';
+
+function normalizeUrl(input: string): string {
+  const trimmed = input.trim().replace(/\/+$/, '');
+  if (trimmed.startsWith('http://') || trimmed.startsWith('https://')) {
+    return trimmed;
+  }
+  return `http://${trimmed}`;
+}
+
+function detectWslGatewayOllamaUrl(): string | null {
+  try {
+    if (process.platform !== 'linux') return null;
+    const version = fs.readFileSync('/proc/version', 'utf-8').toLowerCase();
+    if (!version.includes('microsoft')) return null;
+
+    const gw = execSync("ip route show default | awk '{print $3}' | head -1", {
+      encoding: 'utf-8',
+      stdio: ['ignore', 'pipe', 'ignore'],
+    })
+      .trim();
+    if (!gw) return null;
+    return `http://${gw}:11434`;
+  } catch {
+    return null;
+  }
+}
+
+const explicit = process.env.OLLAMA_URL || process.env.OLLAMA_HOST;
+const inferred = detectWslGatewayOllamaUrl();
+
+// Priority:
+// 1) OLLAMA_URL / OLLAMA_HOST (explicit config)
+// 2) WSL2 gateway (Windows-hosted Ollama)
+// 3) localhost default
+export const OLLAMA_URL = explicit
+  ? normalizeUrl(explicit)
+  : inferred || 'http://localhost:11434';
--- a/__LOCAL_LLMs/setup-tts.sh
+++ b/__LOCAL_LLMs/setup-tts.sh
@ -1,6 +1,6 @@
 #!/bin/bash
 # ============================================================
-# TTS Setup — One-Shot Script for Fresh Laptop
+# TTS Setup - One-Shot Script for Fresh Laptop
 #
 # Sets up Orpheus TTS (via Ollama) and Qwen3-TTS (direct Python)
 # on macOS (Apple Silicon) or Linux (CUDA GPU / WSL2).
@ -18,6 +18,7 @@
 #
 # Usage:
 #   bash setup-tts.sh
+#   From WSL with Ollama on Windows: OLLAMA_HOST=http://<Windows_IP>:11434 bash setup-tts.sh
 #
 # After setup, test with:
 #   .venv-qwen-tts/bin/python test_orpheus_tts.py
@ -32,6 +33,21 @@ MODELS_DIR="$SCRIPT_DIR/models"
 # On personal machines, set HF_MIRROR=https://huggingface.co to download directly
 HF_MIRROR="${HF_MIRROR:-https://hf-mirror.com}"

+# Ollama API base URL (for WSL2 with Ollama on Windows, set OLLAMA_HOST=http://<Windows_IP>:11434)
+OLLAMA_BASE="${OLLAMA_HOST:-http://localhost:11434}"
+OLLAMA_BASE="${OLLAMA_BASE%/}"
+# WSL2: if localhost fails, try Windows host via default gateway (common when Ollama runs on Windows)
+if [ "$OLLAMA_BASE" = "http://localhost:11434" ] && ! curl -s --max-time 2 "$OLLAMA_BASE/api/tags" &>/dev/null; then
+    if [ -r /proc/version ] && grep -qi microsoft /proc/version 2>/dev/null; then
+        WIN_HOST=$(ip route show default 2>/dev/null | awk '{print $3}' | head -1)
+        if [ -n "$WIN_HOST" ] && curl -s --max-time 2 "http://${WIN_HOST}:11434/api/tags" &>/dev/null; then
+            OLLAMA_BASE="http://${WIN_HOST}:11434"
+            export OLLAMA_HOST="${OLLAMA_BASE}"
+        fi
+    fi
+fi
+export OLLAMA_HOST="${OLLAMA_HOST:-$OLLAMA_BASE}"
+
 # Detect OS
 OS_TYPE="$(uname -s)"
 IS_MAC=false
@ -42,18 +58,18 @@ GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 NC='\033[0m'

-ok()   { echo -e "${GREEN}✓${NC} $1"; }
-warn() { echo -e "${YELLOW}⚠${NC} $1"; }
-fail() { echo -e "${RED}✗${NC} $1"; exit 1; }
+ok()   { echo -e "${GREEN}[OK]${NC} $1"; }
+warn() { echo -e "${YELLOW}[!!]${NC} $1"; }
+fail() { echo -e "${RED}[FAIL]${NC} $1"; exit 1; }
 step() { echo -e "\n${GREEN}=== $1 ===${NC}"; }

-echo "╔══════════════════════════════════════════════╗"
-echo "║       TTS Setup — Local Speech Generation    ║"
-echo "║  Orpheus TTS (Ollama) + Qwen3-TTS (Python)  ║"
-echo "╚══════════════════════════════════════════════╝"
+echo "+------------------------------------------------------+"
+echo "|       TTS Setup - Local Speech Generation            |"
+echo "|  Orpheus TTS (Ollama) + Qwen3-TTS (Python)           |"
+echo "+------------------------------------------------------+"
 echo ""

-# ── 0. Check prerequisites ──────────────────────────────────
+# -- 0. Check prerequisites -----------------------------------
 step "Checking prerequisites"

 if $IS_MAC; then
@ -69,29 +85,29 @@ if $IS_MAC; then
        brew install ollama
    fi
 else
-    # Linux / WSL2 — Ollama should be installed on host or via install script
+    # Linux / WSL2 - Ollama should be installed on host or via install script
    if ! command -v ollama &>/dev/null; then
        # On WSL2 Ollama runs on the Windows side; check if reachable
-        if ! curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then
-            fail "Ollama not found and not reachable at localhost:11434. Install Ollama on Windows or run: curl -fsSL https://ollama.com/install.sh | sh"
+        if ! curl -s --max-time 2 "$OLLAMA_BASE/api/tags" &>/dev/null; then
+            fail "Ollama not reachable at $OLLAMA_BASE. From WSL set OLLAMA_HOST=http://<Windows_IP>:11434 (e.g. from /etc/resolv.conf nameserver)."
        fi
-        ok "Ollama reachable at localhost:11434 (Windows host)"
+        ok "Ollama reachable at $OLLAMA_BASE (Windows host)"
    fi
 fi
 ok "Ollama installed"

 # Check if Ollama is running
-if ! curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then
-    warn "Ollama not running. Starting..."
+if ! curl -s --max-time 2 "$OLLAMA_BASE/api/tags" &>/dev/null; then
+    warn "Ollama not running at $OLLAMA_BASE. Starting..."
    if command -v ollama &>/dev/null; then
        ollama serve &>/dev/null &
        sleep 3
    fi
-    if ! curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then
-        fail "Could not start Ollama. Try manually: ollama serve"
+    if ! curl -s --max-time 2 "$OLLAMA_BASE/api/tags" &>/dev/null; then
+        fail "Could not start Ollama. Try manually: ollama serve (or from WSL set OLLAMA_HOST=http://<Windows_IP>:11434)"
    fi
 fi
-ok "Ollama running on port 11434"
+ok "Ollama running at $OLLAMA_BASE"

 # GPU check
 ARCH=$(uname -m)
@ -99,18 +115,18 @@ if $IS_MAC; then
    if [ "$ARCH" != "arm64" ]; then
        warn "Not Apple Silicon ($ARCH). MPS acceleration won't be available."
    else
-        ok "Apple Silicon ($ARCH) — MPS acceleration available"
+        ok "Apple Silicon ($ARCH) - MPS acceleration available"
    fi
 else
    if command -v nvidia-smi &>/dev/null; then
        GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1)
-        ok "NVIDIA GPU detected: $GPU_NAME — CUDA acceleration available"
+        ok "NVIDIA GPU detected: $GPU_NAME - CUDA acceleration available"
    else
        warn "nvidia-smi not found. CUDA acceleration won't be available (CPU fallback)."
    fi
 fi

-# ── 1. Install Python 3.12 ──────────────────────────────────
+# -- 1. Install Python 3.12 -----------------------------------
 step "Python 3.12"

 PYTHON_CMD=""
@ -137,7 +153,7 @@ fi
 PYTHON_VER=$("$PYTHON_CMD" --version 2>&1)
 ok "$PYTHON_VER at $PYTHON_CMD"

-# ── 2. Create venv ──────────────────────────────────────────
+# -- 2. Create venv --------------------------------------------
 step "Python virtual environment"

 if [ -f "$VENV/bin/python" ]; then
@ -148,7 +164,7 @@ else
    ok "Venv created at $VENV"
 fi

-# ── 3. Install Python packages ──────────────────────────────
+# -- 3. Install Python packages --------------------------------
 step "Python packages"

 # Check if snac is installed (quick proxy for all packages)
@ -169,18 +185,38 @@ else
    ok "Packages installed"
 fi

-# ── 4. Pull Orpheus TTS model ───────────────────────────────
+# -- 4. Pull Orpheus TTS model ---------------------------------
 step "Orpheus TTS model (Ollama)"

-if ollama list 2>/dev/null | grep -q "orpheus"; then
+# Helper: use ollama CLI if available, otherwise use API (WSL2 where CLI is on Windows)
+ollama_list() {
+    if command -v ollama &>/dev/null; then
+        ollama list 2>/dev/null
+    else
+        curl -s "$OLLAMA_BASE/api/tags" 2>/dev/null | python3 -c "import sys,json; [print(m['name']) for m in json.load(sys.stdin).get('models',[])]" 2>/dev/null
+    fi
+}
+
+ollama_pull() {
+    local model="$1"
+    if command -v ollama &>/dev/null; then
+        ollama pull "$model"
+    else
+        echo "  (Using Ollama API at $OLLAMA_BASE to pull model...)"
+        curl -s -X POST "$OLLAMA_BASE/api/pull" -d "{\"name\":\"$model\",\"stream\":false}" --max-time 600
+        echo ""
+    fi
+}
+
+if ollama_list | grep -q "orpheus"; then
    ok "Orpheus TTS already downloaded"
 else
    echo "Pulling sematre/orpheus:en (4 GB)..."
-    NO_PROXY="ollama.com,registry.ollama.ai" ollama pull sematre/orpheus:en
+    ollama_pull "sematre/orpheus:en"
    ok "Orpheus TTS downloaded"
 fi

-# ── 5. Download SNAC decoder ────────────────────────────────
+# -- 5. Download SNAC decoder ----------------------------------
 step "SNAC 24kHz audio decoder (~76 MB)"

 mkdir -p "$MODELS_DIR/snac_24khz"
@ -235,7 +271,7 @@ else
    fail "SNAC decoder failed to load. Delete models/snac_24khz/ and re-run."
 fi

-# ── 6. (Optional) Download Qwen3-TTS ────────────────────────
+# -- 6. (Optional) Download Qwen3-TTS --------------------------
 step "Qwen3-TTS 0.6B (optional, ~1.7 GB total)"

 QWEN_TOKENIZER_DIR="$MODELS_DIR/Qwen3-TTS-Tokenizer-12Hz"
@ -245,7 +281,7 @@ if [ -d "$QWEN_MODEL_DIR" ] && [ -f "$QWEN_MODEL_DIR/config.json" ]; then
    ok "Qwen3-TTS already downloaded"
 else
    echo "Qwen3-TTS 0.6B requires ~1.7 GB download (tokenizer + model)."
-    echo "This is optional — Orpheus TTS (above) works without it."
+    echo "This is optional - Orpheus TTS (above) works without it."
    read -p "Download Qwen3-TTS? [y/N] " -n 1 -r
    echo
    if [[ $REPLY =~ ^[Yy]$ ]]; then
@ -275,17 +311,17 @@ else
    fi
 fi

-# ── Summary ──────────────────────────────────────────────────
+# -- Summary ---------------------------------------------------
 step "Setup Complete"

 echo ""
 echo "Installed components:"
-echo "  Orpheus TTS (Ollama):  $(ollama list 2>/dev/null | grep orpheus | awk '{print $NF}' || echo 'ready')"
+echo "  Orpheus TTS (Ollama):  $(ollama_list 2>/dev/null | grep orpheus | head -1 || echo 'ready')"
 echo "  SNAC decoder:          $MODELS_DIR/snac_24khz/"
 if [ -d "$QWEN_MODEL_DIR" ] && [ -f "$QWEN_MODEL_DIR/config.json" ]; then
    echo "  Qwen3-TTS 0.6B:       $QWEN_MODEL_DIR/"
 else
-    echo "  Qwen3-TTS 0.6B:       (not installed — re-run setup to add)"
+    echo "  Qwen3-TTS 0.6B:       (not installed - re-run setup to add)"
 fi
 echo ""
 echo "Disk usage:"
--- a/__LOCAL_LLMs/start-dashboard.sh
+++ b/__LOCAL_LLMs/start-dashboard.sh
@ -11,15 +11,27 @@
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
 DASHBOARD_DIR="$SCRIPT_DIR/dashboard"
 PORT=3000
-OLLAMA_URL="http://localhost:11434"
+
+# Ollama URL: respect OLLAMA_HOST env var, auto-detect WSL2 gateway if needed
+OLLAMA_URL="${OLLAMA_HOST:-http://localhost:11434}"
+OLLAMA_URL="${OLLAMA_URL%/}"
+if ! curl -s --max-time 2 "$OLLAMA_URL/api/tags" &>/dev/null; then
+    if [ -r /proc/version ] && grep -qi microsoft /proc/version 2>/dev/null; then
+        GW=$(ip route show default 2>/dev/null | awk '{print $3}' | head -1)
+        if [ -n "$GW" ] && curl -s --max-time 2 "http://${GW}:11434/api/tags" &>/dev/null; then
+            OLLAMA_URL="http://${GW}:11434"
+        fi
+    fi
+fi
+export OLLAMA_HOST="$OLLAMA_URL"

 GREEN='\033[0;32m'
 RED='\033[0;31m'
 YELLOW='\033[1;33m'
 NC='\033[0m'
-ok()   { echo -e "${GREEN}✓${NC} $1"; }
-warn() { echo -e "${YELLOW}⚠${NC} $1"; }
-fail() { echo -e "${RED}✗${NC} $1"; }
+ok()   { echo -e "${GREEN}[OK]${NC} $1"; }
+warn() { echo -e "${YELLOW}[!!]${NC} $1"; }
+fail() { echo -e "${RED}[FAIL]${NC} $1"; }

 case "${1:-start}" in
    stop)
@ -39,9 +51,9 @@ case "${1:-start}" in
        # Ollama
        if curl -s --max-time 2 "$OLLAMA_URL/api/tags" &>/dev/null; then
            MODELS=$(curl -s "$OLLAMA_URL/api/tags" | python3 -c "import sys,json; print(len(json.load(sys.stdin).get('models',[])))" 2>/dev/null || echo "?")
-            ok "Ollama running ($MODELS models)"
+            ok "Ollama running at $OLLAMA_URL ($MODELS models)"
        else
-            fail "Ollama not running"
+            fail "Ollama not running at $OLLAMA_URL"
        fi
        # Dashboard
        if curl -s --max-time 2 "http://localhost:$PORT" &>/dev/null; then
@ -56,17 +68,21 @@ case "${1:-start}" in
        echo "=== Starting Mission Control ==="
        echo ""

-        # 1. Ensure Ollama is running
+        # 1. Check Ollama connectivity (don't try to start it -- on WSL2 it runs on Windows)
        if curl -s --max-time 2 "$OLLAMA_URL/api/tags" &>/dev/null; then
-            ok "Ollama already running"
+            ok "Ollama running at $OLLAMA_URL"
        else
+            if command -v ollama &>/dev/null; then
                echo "Starting Ollama..."
                ollama serve &>/dev/null &
                sleep 2
                if curl -s --max-time 2 "$OLLAMA_URL/api/tags" &>/dev/null; then
                    ok "Ollama started"
                else
-                fail "Could not start Ollama. Try: ollama serve"
+                    warn "Could not start Ollama. It may be running on Windows already."
+                fi
+            else
+                warn "Ollama not reachable at $OLLAMA_URL (on WSL2, ensure it is running on Windows with OLLAMA_HOST=0.0.0.0:11434)"
            fi
        fi

@ -83,12 +99,14 @@ case "${1:-start}" in
            ok "Dependencies installed"
        fi

-        # 4. Start dashboard
+        # 4. Start dashboard (use node directly -- npx/next may lack +x on /mnt/ NTFS)
        echo "Starting dashboard on port $PORT..."
-        (cd "$DASHBOARD_DIR" && npm run dev &>/dev/null &)
+        LOGFILE="$DASHBOARD_DIR/.next-dev.log"
+        (cd "$DASHBOARD_DIR" && OLLAMA_HOST="$OLLAMA_URL" node node_modules/next/dist/bin/next dev > "$LOGFILE" 2>&1 &)

-        # Wait for it to be ready
-        for i in $(seq 1 15); do
+        # Wait for it to be ready (Next.js on /mnt/d can be slow to compile)
+        echo "  Waiting for Next.js to compile (this can take 1-2 min on /mnt/d)..."
+        for i in $(seq 1 90); do
            if curl -s --max-time 1 "http://localhost:$PORT" &>/dev/null; then
                ok "Dashboard ready at http://localhost:$PORT"
                echo ""
@ -99,7 +117,11 @@ case "${1:-start}" in
            sleep 1
        done

-        fail "Dashboard did not start within 15s. Check: cd dashboard && npm run dev"
+        warn "Dashboard did not respond within 90s."
+        echo "  It may still be compiling. Check the log:"
+        echo "    tail -f $LOGFILE"
+        echo "  Or start manually:"
+        echo "    cd $DASHBOARD_DIR && npx next dev"
        exit 1
        ;;

--- a/__LOCAL_LLMs/windows_specific/run-tts-setup.sh
+++ b/__LOCAL_LLMs/windows_specific/run-tts-setup.sh
@ -0,0 +1,23 @@
+#!/bin/bash
+# Wrapper: fix line endings, detect Windows host IP, run TTS setup
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+LLMS_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+
+# Fix CRLF
+sed -i 's/\r$//' "$LLMS_DIR/setup-tts.sh" 2>/dev/null
+
+# Detect Windows host IP via default gateway
+GW=$(ip route show default 2>/dev/null | awk '{print $3}' | head -1)
+if [ -n "$GW" ] && curl -s --max-time 2 "http://${GW}:11434/api/tags" >/dev/null 2>&1; then
+    export OLLAMA_HOST="http://${GW}:11434"
+    echo "Ollama found at $OLLAMA_HOST (via default gateway)"
+elif curl -s --max-time 2 "http://localhost:11434/api/tags" >/dev/null 2>&1; then
+    export OLLAMA_HOST="http://localhost:11434"
+    echo "Ollama found at $OLLAMA_HOST"
+else
+    echo "WARNING: Ollama not found. The script may fail at prerequisite check."
+fi
+
+export HF_MIRROR="${HF_MIRROR:-https://huggingface.co}"
+cd "$LLMS_DIR"
+exec bash setup-tts.sh
--- a/__LOCAL_LLMs/windows_specific/setup-windows.ps1
+++ b/__LOCAL_LLMs/windows_specific/setup-windows.ps1
@ -1,5 +1,5 @@
 # ============================================================
-# Windows-Side Setup — Local LLM Stack (Razer Blade 18)
+# Windows-Side Setup - Local LLM Stack (Razer Blade 18)
 #
 # Run this FIRST from PowerShell (Admin) on a fresh Windows machine.
 # After this completes, open WSL2 and run setup-wsl.sh.
@ -27,12 +27,12 @@ function Write-Step  { param($msg) Write-Host "`n=== $msg ===" -ForegroundColor

 Write-Host ""
 Write-Host "  =====================================" -ForegroundColor Cyan
-Write-Host "  Local LLM Stack — Windows Setup" -ForegroundColor Cyan
+Write-Host "  Local LLM Stack - Windows Setup" -ForegroundColor Cyan
 Write-Host "  Razer Blade 18 / RTX 5090" -ForegroundColor Cyan
 Write-Host "  =====================================" -ForegroundColor Cyan
 Write-Host ""

-# ── 1. Check NVIDIA Drivers ──────────────────────────────────
+# -- 1. Check NVIDIA Drivers ----------------------------------
 Write-Step "Step 1/4: NVIDIA Drivers + CUDA"

 $nvidiaSmi = Get-Command nvidia-smi -ErrorAction SilentlyContinue
@ -53,7 +53,7 @@ if ($nvidiaSmi) {
    if ($continue -ne "y") { exit 1 }
 }

-# ── 2. Install Ollama ────────────────────────────────────────
+# -- 2. Install Ollama -----------------------------------------
 Write-Step "Step 2/4: Ollama"

 $ollamaCmd = Get-Command ollama -ErrorAction SilentlyContinue
@ -93,9 +93,9 @@ try {
    }
 }

-# ── 3. Pull Models ───────────────────────────────────────────
+# -- 3. Pull Models --------------------------------------------
 Write-Step "Step 3/4: Pull Ollama Models"
-
+# Curated for coding + reasoning + TTS; update from https://ollama.com/models as needed
 $models = @(
    @{ name = "qwen2.5-coder:32b";  desc = "19 GB - primary coding model" },
    @{ name = "qwen2.5-coder:7b";   desc = "4.7 GB - fast coding" },
@ -117,14 +117,14 @@ try {
 foreach ($model in $models) {
    $alreadyPulled = $existingModels | Where-Object { $_ -like "*$($model.name)*" }
    if ($alreadyPulled) {
-        Write-OK "$($model.name) — already pulled"
+        Write-OK "$($model.name) - already pulled"
    } else {
        Write-Host "  Pulling $($model.name) ($($model.desc))..."
        & ollama pull $model.name
        if ($LASTEXITCODE -eq 0) {
-            Write-OK "$($model.name) — pulled"
+            Write-OK "$($model.name) - pulled"
        } else {
-            Write-Warn "$($model.name) — pull failed (you can retry later: ollama pull $($model.name))"
+            Write-Warn "$($model.name) - pull failed (you can retry later: ollama pull $($model.name))"
        }
    }
 }
@ -134,7 +134,7 @@ Write-Host ""
 Write-Host "  Installed models:"
 & ollama list

-# ── 4. Install WSL2 ──────────────────────────────────────────
+# -- 4. Install WSL2 -------------------------------------------
 Write-Step "Step 4/4: WSL2 (Ubuntu 24.04)"

 $wslInstalled = $false
@ -158,19 +158,19 @@ if (-not $wslInstalled) {
    Write-Warn "After reboot, Ubuntu will ask you to set up a username/password."
 }

-# ── Summary ──────────────────────────────────────────────────
-Write-Host ""
-Write-Host "  =====================================" -ForegroundColor Green
-Write-Host "  Windows-Side Setup Complete!" -ForegroundColor Green
-Write-Host "  =====================================" -ForegroundColor Green
-Write-Host ""
-Write-Host "  Next steps:" -ForegroundColor Yellow
-Write-Host "  1. If WSL2 was just installed, reboot and set up Ubuntu username/password"
-Write-Host "  2. Open Ubuntu (WSL2) terminal"
-Write-Host "  3. Run the WSL2 setup script:"
-Write-Host ""
-Write-Host "     curl -fsSL https://raw.githubusercontent.com/saravanakumardb1/learning_ai_common_plat/main/__LOCAL_LLMs/windows_specific/setup-wsl.sh | bash" -ForegroundColor Cyan
-Write-Host ""
-Write-Host "     Or if you already cloned the repo:"
-Write-Host "     bash ~/code/mygh/learning_ai_common_plat/__LOCAL_LLMs/windows_specific/setup-wsl.sh" -ForegroundColor Cyan
-Write-Host ""
+# -- Summary ---------------------------------------------------
+Write-Host ''
+Write-Host '  =====================================' -ForegroundColor Green
+Write-Host '  Windows-Side Setup Complete!' -ForegroundColor Green
+Write-Host '  =====================================' -ForegroundColor Green
+Write-Host ''
+Write-Host '  Next steps:' -ForegroundColor Yellow
+Write-Host '  1. If WSL2 was just installed, reboot and set up Ubuntu username/password'
+Write-Host '  2. Open Ubuntu (WSL2) terminal'
+Write-Host '  3. Run the WSL2 setup script:'
+Write-Host ''
+Write-Host '     curl -fsSL https://raw.githubusercontent.com/saravanakumardb1/learning_ai_common_plat/main/__LOCAL_LLMs/windows_specific/setup-wsl.sh | bash' -ForegroundColor Cyan
+Write-Host ''
+Write-Host '     Or if you already cloned the repo:'
+Write-Host '     bash ~/code/mygh/learning_ai_common_plat/__LOCAL_LLMs/windows_specific/setup-wsl.sh' -ForegroundColor Cyan
+Write-Host ''
--- a/__LOCAL_LLMs/windows_specific/setup-wsl.sh
+++ b/__LOCAL_LLMs/windows_specific/setup-wsl.sh
@ -1,15 +1,15 @@
 #!/bin/bash
 # ============================================================
-# WSL2-Side Setup — Local LLM Stack (Razer Blade 18)
+# WSL2-Side Setup - Local LLM Stack (Razer Blade 18)
 #
 # Run this INSIDE WSL2 (Ubuntu 24.04) after setup-windows.ps1.
-# This is a one-shot script — safe to re-run (idempotent).
+# This is a one-shot script - safe to re-run (idempotent).
 #
 # What this does:
 #   1. Installs system deps (Node.js 20, Python 3.12, ffmpeg, cmake)
 #   2. Verifies NVIDIA GPU passthrough (CUDA)
 #   3. Clones the repo (if not already cloned)
-#   4. Builds whisper.cpp with CUDA
+#   4. Builds whisper.cpp with CUDA (auto-detects 120 for Blackwell/RTX 50 when nvcc 12.8+)
 #   5. Downloads Whisper large-v3-turbo model
 #   6. Runs setup-tts.sh (Orpheus + Qwen3-TTS)
 #   7. Starts the Mission Control dashboard
@ -17,6 +17,9 @@
 # Usage:
 #   bash setup-wsl.sh
 #
+# When run from inside the repo (e.g. script path under __LOCAL_LLMs/), uses
+# that repo and skips clone. Or set EXISTING_REPO_PATH=/path/to/repo to use it.
+#
 # Or one-liner from a fresh WSL2 terminal:
 #   curl -fsSL https://raw.githubusercontent.com/saravanakumardb1/learning_ai_common_plat/main/__LOCAL_LLMs/windows_specific/setup-wsl.sh | bash
 # ============================================================
@ -35,6 +38,15 @@ step() { echo -e "\n${CYAN}=== $1 ===${NC}"; }

 REPO_URL="https://github.com/saravanakumardb1/learning_ai_common_plat.git"
 REPO_DIR="$HOME/code/mygh/learning_ai_common_plat"
+
+# Use existing repo if run from repo path or EXISTING_REPO_PATH is set
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
+if [ -n "${EXISTING_REPO_PATH:-}" ]; then
+    REPO_DIR="$(cd "$EXISTING_REPO_PATH" && pwd)"
+elif [ -d "$SCRIPT_DIR/../.." ] && [ -d "$SCRIPT_DIR/../../__LOCAL_LLMs" ]; then
+    REPO_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
+fi
+
 LLMS_DIR="$REPO_DIR/__LOCAL_LLMs"
 WHISPER_DIR="$HOME/whisper-cpp"
 WHISPER_MODEL_DIR="$HOME/whisper-models"
@ -42,12 +54,12 @@ WHISPER_MODEL_FILE="$WHISPER_MODEL_DIR/ggml-large-v3-turbo.bin"

 echo ""
 echo -e "  ${CYAN}=====================================${NC}"
-echo -e "  ${CYAN}Local LLM Stack — WSL2 Setup${NC}"
+echo -e "  ${CYAN}Local LLM Stack - WSL2 Setup${NC}"
 echo -e "  ${CYAN}Ubuntu 24.04 + CUDA${NC}"
 echo -e "  ${CYAN}=====================================${NC}"
 echo ""

-# ── 1. System Dependencies ───────────────────────────────────
+# -- 1. System Dependencies -----------------------------------
 step "Step 1/7: System Dependencies"

 sudo apt update -qq
@ -92,7 +104,7 @@ if [ -n "$PKGS_TO_INSTALL" ]; then
 fi
 ok "ffmpeg, git, curl, build-essential, cmake"

-# ── 2. NVIDIA GPU Passthrough ────────────────────────────────
+# -- 2. NVIDIA GPU Passthrough ---------------------------------
 step "Step 2/7: NVIDIA GPU (CUDA passthrough)"

 if command -v nvidia-smi &>/dev/null; then
@ -111,15 +123,28 @@ else
    if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
 fi

-# ── 3. Verify Ollama (Windows-side) ──────────────────────────
+# -- 3. Verify Ollama (Windows-side) ---------------------------
 step "Step 3/7: Ollama Connectivity"

-if curl -s --max-time 3 http://localhost:11434/api/tags &>/dev/null; then
-    MODEL_COUNT=$(curl -s http://localhost:11434/api/tags | python3 -c "import sys,json; print(len(json.load(sys.stdin).get('models',[])))" 2>/dev/null || echo "?")
-    ok "Ollama reachable at localhost:11434 ($MODEL_COUNT models)"
+OLLAMA_URL="${OLLAMA_HOST:-http://localhost:11434}"
+OLLAMA_URL="${OLLAMA_URL%/}"
+# WSL2: if localhost fails, try Windows host via default gateway
+if ! curl -s --max-time 3 "$OLLAMA_URL/api/tags" &>/dev/null; then
+    if [ -r /proc/version ] && grep -qi microsoft /proc/version 2>/dev/null; then
+        GW=$(ip route show default 2>/dev/null | awk '{print $3}' | head -1)
+        if [ -n "$GW" ] && curl -s --max-time 3 "http://${GW}:11434/api/tags" &>/dev/null; then
+            OLLAMA_URL="http://${GW}:11434"
+        fi
+    fi
+fi
+export OLLAMA_HOST="$OLLAMA_URL"
+
+if curl -s --max-time 3 "$OLLAMA_URL/api/tags" &>/dev/null; then
+    MODEL_COUNT=$(curl -s "$OLLAMA_URL/api/tags" | python3 -c "import sys,json; print(len(json.load(sys.stdin).get('models',[])))" 2>/dev/null || echo "?")
+    ok "Ollama reachable at $OLLAMA_URL ($MODEL_COUNT models)"
 else
-    warn "Ollama not reachable at localhost:11434"
-    echo "  Make sure Ollama is running on the Windows side."
+    warn "Ollama not reachable at $OLLAMA_URL"
+    echo "  Make sure Ollama is running on Windows with OLLAMA_HOST=0.0.0.0:11434"
    echo "  Open PowerShell and run: ollama serve"
    echo ""
    read -p "  Continue anyway? (y/N) " -n 1 -r
@ -127,13 +152,41 @@ else
    if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
 fi

-# ── 4. Clone Repo ────────────────────────────────────────────
+# -- 4. Clone Repo ----------------------------------------------
 step "Step 4/7: Clone Repository"

+DEFAULT_REPO_DIR="$HOME/code/mygh/learning_ai_common_plat"
+# Performance: /mnt/* is slower for node_modules. Keep setup fast by default.
+# Optional: set MIRROR_TO_WSL_LOCAL=1 to copy repo to $DEFAULT_REPO_DIR.
+if [[ "$REPO_DIR" == /mnt/* ]]; then
+    warn "Repo is on Windows mount: $REPO_DIR"
+    echo "  This works, but dashboard/node_modules can be slower on /mnt/*."
+    if [ -d "$DEFAULT_REPO_DIR/__LOCAL_LLMs" ]; then
+        REPO_DIR="$DEFAULT_REPO_DIR"
+        LLMS_DIR="$REPO_DIR/__LOCAL_LLMs"
+        ok "Using existing WSL-local repo at $REPO_DIR"
+    elif [ "${MIRROR_TO_WSL_LOCAL:-0}" = "1" ] && [ -d "$REPO_DIR/.git" ]; then
+        echo "  MIRROR_TO_WSL_LOCAL=1 -> copying repo to:"
+        echo "    $DEFAULT_REPO_DIR"
+        mkdir -p "$(dirname "$DEFAULT_REPO_DIR")"
+        cp -a "$REPO_DIR" "$DEFAULT_REPO_DIR"
+        REPO_DIR="$DEFAULT_REPO_DIR"
+        LLMS_DIR="$REPO_DIR/__LOCAL_LLMs"
+        ok "Copied repo to $REPO_DIR"
+    else
+        echo "  Continuing on current path for speed."
+        echo "  Tip: to mirror once into WSL-local FS, re-run with:"
+        echo "    MIRROR_TO_WSL_LOCAL=1 bash setup-wsl.sh"
+    fi
+fi
+
 if [ -d "$LLMS_DIR" ]; then
    ok "Repo already cloned at $REPO_DIR"
+    # Only pull when using the default clone path.
+    if [ "$REPO_DIR" = "$DEFAULT_REPO_DIR" ]; then
        echo "  Pulling latest..."
        (cd "$REPO_DIR" && git pull --ff-only origin main 2>/dev/null || true)
+    fi
 else
    echo "  Cloning into $REPO_DIR..."
    mkdir -p "$(dirname "$REPO_DIR")"
@ -147,7 +200,7 @@ if [ ! -d "$LLMS_DIR" ]; then
 fi
 ok "__LOCAL_LLMs directory: $LLMS_DIR"

-# ── 5. Build Whisper.cpp (CUDA) ──────────────────────────────
+# -- 5. Build Whisper.cpp (CUDA) ---------------------------------
 step "Step 5/7: Whisper.cpp (CUDA build)"

 if command -v whisper-cli &>/dev/null; then
@ -168,9 +221,27 @@ else
            echo "  Installing CUDA toolkit for compilation..."
            sudo apt install -y nvidia-cuda-toolkit -qq 2>/dev/null || true
        fi
-        cmake -B build -DGGML_CUDA=ON 2>/dev/null || cmake -B build
+        # Use latest GPU arch supported by this nvcc: Blackwell (RTX 50) = 120 (CUDA 12.8+), else Ada = 90.
+        if [ -n "${CMAKE_CUDA_ARCHITECTURES:-}" ]; then
+            CUDA_ARCH="$CMAKE_CUDA_ARCHITECTURES"
+            echo "  Using CUDA architectures: $CUDA_ARCH (from CMAKE_CUDA_ARCHITECTURES)"
        else
-        warn "No CUDA — building CPU-only whisper.cpp"
+            NVCC_VER=""
+            if command -v nvcc &>/dev/null; then
+                NVCC_VER=$(nvcc --version 2>/dev/null | sed -n 's/.*release \([0-9]*\)\.\([0-9]*\).*/\1.\2/p' | head -1)
+            fi
+            # Blackwell (sm_120) requires CUDA 12.8+
+            if [ -n "$NVCC_VER" ] && [ "$(printf '%s\n' "12.8" "$NVCC_VER" | sort -V | tail -1)" = "$NVCC_VER" ]; then
+                CUDA_ARCH="120"
+                echo "  Using CUDA architecture 120 (Blackwell / RTX 50) - nvcc $NVCC_VER"
+            else
+                CUDA_ARCH="90"
+                echo "  Using CUDA architecture 90 (Ada) - nvcc ${NVCC_VER:-unknown} (use CUDA 12.8+ for Blackwell 120)"
+            fi
+        fi
+        cmake -B build -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCH}" 2>/dev/null || cmake -B build -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCH}"
+    else
+        warn "No CUDA - building CPU-only whisper.cpp"
        cmake -B build
    fi

@ -198,18 +269,23 @@ if [ ! -f "$WHISPER_MODEL_FILE" ]; then
    ok "Whisper model downloaded"
 fi

-# ── 6. TTS Setup ─────────────────────────────────────────────
+# -- 6. TTS Setup ------------------------------------------------
 step "Step 6/7: TTS Setup (Orpheus + Qwen3-TTS)"

 cd "$LLMS_DIR"

+# Ensure shell scripts have LF line endings (Windows clone may have CRLF)
+for f in setup-tts.sh start-dashboard.sh download-tts-models.sh; do
+    [ -f "$f" ] && sed -i 's/\r$//' "$f" 2>/dev/null || true
+done
+
 # Use huggingface.co directly (personal machine, no corporate proxy)
 echo "  Running setup-tts.sh (this may take several minutes on first run)..."
 HF_MIRROR=https://huggingface.co bash setup-tts.sh

 ok "TTS setup complete"

-# ── 7. Start Dashboard ───────────────────────────────────────
+# -- 7. Start Dashboard ------------------------------------------
 step "Step 7/7: Mission Control Dashboard"

 cd "$LLMS_DIR"
@ -229,7 +305,7 @@ else
    bash start-dashboard.sh
 fi

-# ── Summary ──────────────────────────────────────────────────
+# -- Summary -----------------------------------------------------
 echo ""
 echo -e "  ${GREEN}=====================================${NC}"
 echo -e "  ${GREEN}WSL2 Setup Complete!${NC}"