learning_ai_common_plat/__LOCAL_LLMs/windows_specific/setup-wsl.sh

#!/bin/bash
# ============================================================
# WSL2-Side Setup - Local LLM Stack (Razer Blade 18)
#
# Run this INSIDE WSL2 (Ubuntu 24.04) after setup-windows.ps1.
# This is a one-shot script - safe to re-run (idempotent).
#
# What this does:
#   1. Installs system deps (Node.js 20, Python 3.12, ffmpeg, cmake)
#   2. Verifies NVIDIA GPU passthrough (CUDA)
#   3. Clones the repo (if not already cloned)
#   4. Builds whisper.cpp with CUDA (auto-detects 120 for Blackwell/RTX 50 when nvcc 12.8+)
#   5. Downloads Whisper large-v3-turbo model
#   6. Runs setup-tts.sh (Orpheus + Qwen3-TTS)
#   7. Starts the Mission Control dashboard
#
# Usage:
#   bash setup-wsl.sh
#
# When run from inside the repo (e.g. script path under __LOCAL_LLMs/), uses
# that repo and skips clone. Or set EXISTING_REPO_PATH=/path/to/repo to use it.
#
# Or one-liner from a fresh WSL2 terminal:
#   curl -fsSL https://raw.githubusercontent.com/saravanakumardb1/learning_ai_common_plat/main/__LOCAL_LLMs/windows_specific/setup-wsl.sh | bash
# ============================================================
set -e

RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
NC='\033[0m'

ok()   { echo -e "  ${GREEN}[OK]${NC} $1"; }
warn() { echo -e "  ${YELLOW}[!!]${NC} $1"; }
fail() { echo -e "  ${RED}[FAIL]${NC} $1"; exit 1; }
step() { echo -e "\n${CYAN}=== $1 ===${NC}"; }

REPO_URL="https://github.com/saravanakumardb1/learning_ai_common_plat.git"
REPO_DIR="$HOME/code/mygh/learning_ai_common_plat"

# Use existing repo if run from repo path or EXISTING_REPO_PATH is set
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
if [ -n "${EXISTING_REPO_PATH:-}" ]; then
    REPO_DIR="$(cd "$EXISTING_REPO_PATH" && pwd)"
elif [ -d "$SCRIPT_DIR/../.." ] && [ -d "$SCRIPT_DIR/../../__LOCAL_LLMs" ]; then
    REPO_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
fi

LLMS_DIR="$REPO_DIR/__LOCAL_LLMs"
WHISPER_DIR="$HOME/whisper-cpp"
WHISPER_MODEL_DIR="$HOME/whisper-models"
WHISPER_MODEL_FILE="$WHISPER_MODEL_DIR/ggml-large-v3-turbo.bin"

echo ""
echo -e "  ${CYAN}=====================================${NC}"
echo -e "  ${CYAN}Local LLM Stack - WSL2 Setup${NC}"
echo -e "  ${CYAN}Ubuntu 24.04 + CUDA${NC}"
echo -e "  ${CYAN}=====================================${NC}"
echo ""

# -- 1. System Dependencies -----------------------------------
step "Step 1/7: System Dependencies"

sudo apt update -qq

# Node.js 20 LTS
if command -v node &>/dev/null && [[ "$(node --version)" == v20* || "$(node --version)" == v22* ]]; then
    ok "Node.js $(node --version) already installed"
else
    echo "  Installing Node.js 20 LTS..."
    curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash - 2>/dev/null
    sudo apt install -y nodejs -qq
    ok "Node.js $(node --version) installed"
fi

# Python 3.12
PYTHON_CMD=""
for cmd in python3.12 python3; do
    if command -v "$cmd" &>/dev/null; then
        PYTHON_CMD="$cmd"
        break
    fi
done

if [ -z "$PYTHON_CMD" ]; then
    echo "  Installing Python 3.12..."
    sudo apt install -y python3.12 python3.12-venv python3-pip -qq
    PYTHON_CMD="python3.12"
fi
ok "$($PYTHON_CMD --version) at $(which $PYTHON_CMD)"

# Build tools + ffmpeg
PKGS_TO_INSTALL=""
for pkg in ffmpeg git curl build-essential cmake; do
    if ! dpkg -s "$pkg" &>/dev/null; then
        PKGS_TO_INSTALL="$PKGS_TO_INSTALL $pkg"
    fi
done

if [ -n "$PKGS_TO_INSTALL" ]; then
    echo "  Installing:$PKGS_TO_INSTALL"
    sudo apt install -y $PKGS_TO_INSTALL -qq
fi
ok "ffmpeg, git, curl, build-essential, cmake"

# -- 2. NVIDIA GPU Passthrough ---------------------------------
step "Step 2/7: NVIDIA GPU (CUDA passthrough)"

if command -v nvidia-smi &>/dev/null; then
    GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1)
    GPU_MEM=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader 2>/dev/null | head -1)
    ok "GPU: $GPU_NAME ($GPU_MEM)"
else
    warn "nvidia-smi not found!"
    echo "  Possible fixes:"
    echo "    1. Update NVIDIA drivers on Windows to the latest version"
    echo "    2. Run in PowerShell (Admin): wsl --update"
    echo "    3. Do NOT install nvidia-driver-* inside WSL2"
    echo ""
    read -p "  Continue without CUDA? (y/N) " -n 1 -r
    echo
    if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
fi

# -- 3. Verify Ollama (Windows-side) ---------------------------
step "Step 3/7: Ollama Connectivity"

OLLAMA_URL="${OLLAMA_HOST:-http://localhost:11434}"
OLLAMA_URL="${OLLAMA_URL%/}"
# WSL2: if localhost fails, try Windows host via default gateway
if ! curl -s --max-time 3 "$OLLAMA_URL/api/tags" &>/dev/null; then
    if [ -r /proc/version ] && grep -qi microsoft /proc/version 2>/dev/null; then
        GW=$(ip route show default 2>/dev/null | awk '{print $3}' | head -1)
        if [ -n "$GW" ] && curl -s --max-time 3 "http://${GW}:11434/api/tags" &>/dev/null; then
            OLLAMA_URL="http://${GW}:11434"
        fi
    fi
fi
export OLLAMA_HOST="$OLLAMA_URL"

if curl -s --max-time 3 "$OLLAMA_URL/api/tags" &>/dev/null; then
    MODEL_COUNT=$(curl -s "$OLLAMA_URL/api/tags" | python3 -c "import sys,json; print(len(json.load(sys.stdin).get('models',[])))" 2>/dev/null || echo "?")
    ok "Ollama reachable at $OLLAMA_URL ($MODEL_COUNT models)"
else
    warn "Ollama not reachable at $OLLAMA_URL"
    echo "  Make sure Ollama is running on Windows with OLLAMA_HOST=0.0.0.0:11434"
    echo "  Open PowerShell and run: ollama serve"
    echo ""
    read -p "  Continue anyway? (y/N) " -n 1 -r
    echo
    if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
fi

# -- 4. Clone Repo ----------------------------------------------
step "Step 4/7: Clone Repository"

DEFAULT_REPO_DIR="$HOME/code/mygh/learning_ai_common_plat"
# Performance: /mnt/* is slower for node_modules. Keep setup fast by default.
# Optional: set MIRROR_TO_WSL_LOCAL=1 to copy repo to $DEFAULT_REPO_DIR.
if [[ "$REPO_DIR" == /mnt/* ]]; then
    warn "Repo is on Windows mount: $REPO_DIR"
    echo "  This works, but dashboard/node_modules can be slower on /mnt/*."
    if [ -d "$DEFAULT_REPO_DIR/__LOCAL_LLMs" ]; then
        REPO_DIR="$DEFAULT_REPO_DIR"
        LLMS_DIR="$REPO_DIR/__LOCAL_LLMs"
        ok "Using existing WSL-local repo at $REPO_DIR"
    elif [ "${MIRROR_TO_WSL_LOCAL:-0}" = "1" ] && [ -d "$REPO_DIR/.git" ]; then
        echo "  MIRROR_TO_WSL_LOCAL=1 -> copying repo to:"
        echo "    $DEFAULT_REPO_DIR"
        mkdir -p "$(dirname "$DEFAULT_REPO_DIR")"
        cp -a "$REPO_DIR" "$DEFAULT_REPO_DIR"
        REPO_DIR="$DEFAULT_REPO_DIR"
        LLMS_DIR="$REPO_DIR/__LOCAL_LLMs"
        ok "Copied repo to $REPO_DIR"
    else
        echo "  Continuing on current path for speed."
        echo "  Tip: to mirror once into WSL-local FS, re-run with:"
        echo "    MIRROR_TO_WSL_LOCAL=1 bash setup-wsl.sh"
    fi
fi

if [ -d "$LLMS_DIR" ]; then
    ok "Repo already cloned at $REPO_DIR"
    # Only pull when using the default clone path.
    if [ "$REPO_DIR" = "$DEFAULT_REPO_DIR" ]; then
        echo "  Pulling latest..."
        (cd "$REPO_DIR" && git pull --ff-only origin main 2>/dev/null || true)
    fi
else
    echo "  Cloning into $REPO_DIR..."
    mkdir -p "$(dirname "$REPO_DIR")"
    git clone "$REPO_URL" "$REPO_DIR"
    ok "Cloned"
fi

# Verify __LOCAL_LLMs exists
if [ ! -d "$LLMS_DIR" ]; then
    fail "__LOCAL_LLMs directory not found at $LLMS_DIR"
fi
ok "__LOCAL_LLMs directory: $LLMS_DIR"

# -- 5. Build Whisper.cpp (CUDA) ---------------------------------
step "Step 5/7: Whisper.cpp (CUDA build)"

if command -v whisper-cli &>/dev/null; then
    ok "whisper-cli already installed: $(which whisper-cli)"
else
    if [ ! -d "$WHISPER_DIR" ]; then
        echo "  Cloning whisper.cpp..."
        git clone https://github.com/ggerganov/whisper.cpp.git "$WHISPER_DIR"
    fi

    echo "  Building with CUDA support (this may take a few minutes)..."
    cd "$WHISPER_DIR"

    # Check if CUDA toolkit headers are available for build
    if command -v nvidia-smi &>/dev/null; then
        # Need CUDA toolkit for compilation
        if ! dpkg -s nvidia-cuda-toolkit &>/dev/null 2>&1; then
            echo "  Installing CUDA toolkit for compilation..."
            sudo apt install -y nvidia-cuda-toolkit -qq 2>/dev/null || true
        fi
        # Use latest GPU arch supported by this nvcc: Blackwell (RTX 50) = 120 (CUDA 12.8+), else Ada = 90.
        if [ -n "${CMAKE_CUDA_ARCHITECTURES:-}" ]; then
            CUDA_ARCH="$CMAKE_CUDA_ARCHITECTURES"
            echo "  Using CUDA architectures: $CUDA_ARCH (from CMAKE_CUDA_ARCHITECTURES)"
        else
            NVCC_VER=""
            if command -v nvcc &>/dev/null; then
                NVCC_VER=$(nvcc --version 2>/dev/null | sed -n 's/.*release \([0-9]*\)\.\([0-9]*\).*/\1.\2/p' | head -1)
            fi
            # Blackwell (sm_120) requires CUDA 12.8+
            if [ -n "$NVCC_VER" ] && [ "$(printf '%s\n' "12.8" "$NVCC_VER" | sort -V | tail -1)" = "$NVCC_VER" ]; then
                CUDA_ARCH="120"
                echo "  Using CUDA architecture 120 (Blackwell / RTX 50) - nvcc $NVCC_VER"
            else
                CUDA_ARCH="90"
                echo "  Using CUDA architecture 90 (Ada) - nvcc ${NVCC_VER:-unknown} (use CUDA 12.8+ for Blackwell 120)"
            fi
        fi
        cmake -B build -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCH}" 2>/dev/null || cmake -B build -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCH}"
    else
        warn "No CUDA - building CPU-only whisper.cpp"
        cmake -B build
    fi

    cmake --build build --config Release -j$(nproc)
    sudo cp build/bin/whisper-cli /usr/local/bin/
    ok "whisper-cli installed to /usr/local/bin/"
fi

# Download Whisper model
if [ -f "$WHISPER_MODEL_FILE" ]; then
    SIZE=$(stat -c%s "$WHISPER_MODEL_FILE" 2>/dev/null || echo 0)
    if [ "$SIZE" -gt 100000000 ]; then
        ok "Whisper model already downloaded ($(echo "scale=0; $SIZE/1048576" | bc) MB)"
    else
        warn "Whisper model looks incomplete ($SIZE bytes). Re-downloading..."
        rm -f "$WHISPER_MODEL_FILE"
    fi
fi

if [ ! -f "$WHISPER_MODEL_FILE" ]; then
    mkdir -p "$WHISPER_MODEL_DIR"
    echo "  Downloading ggml-large-v3-turbo.bin (~1.5 GB)..."
    curl -L --progress-bar -o "$WHISPER_MODEL_FILE" \
        "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo.bin"
    ok "Whisper model downloaded"
fi

# -- 6. TTS Setup ------------------------------------------------
step "Step 6/7: TTS Setup (Orpheus + Qwen3-TTS)"

cd "$LLMS_DIR"

# Ensure shell scripts have LF line endings (Windows clone may have CRLF)
for f in setup-tts.sh start-dashboard.sh download-tts-models.sh; do
    [ -f "$f" ] && sed -i 's/\r$//' "$f" 2>/dev/null || true
done

# Use huggingface.co directly (personal machine, no corporate proxy)
echo "  Running setup-tts.sh (this may take several minutes on first run)..."
HF_MIRROR=https://huggingface.co bash setup-tts.sh

ok "TTS setup complete"

# -- 7. Start Dashboard ------------------------------------------
step "Step 7/7: Mission Control Dashboard"

cd "$LLMS_DIR"

# Install npm deps if needed
if [ ! -d "$LLMS_DIR/dashboard/node_modules" ]; then
    echo "  Installing dashboard dependencies..."
    (cd "$LLMS_DIR/dashboard" && npm install --silent)
    ok "Dashboard dependencies installed"
fi

# Check if already running
if curl -s --max-time 2 http://localhost:3000 &>/dev/null; then
    ok "Dashboard already running at http://localhost:3000"
else
    echo "  Starting dashboard..."
    bash start-dashboard.sh
fi

# -- Summary -----------------------------------------------------
echo ""
echo -e "  ${GREEN}=====================================${NC}"
echo -e "  ${GREEN}WSL2 Setup Complete!${NC}"
echo -e "  ${GREEN}=====================================${NC}"
echo ""
echo "  Components installed:"
echo "    Node.js:       $(node --version)"
echo "    Python:        $($PYTHON_CMD --version 2>&1)"
if command -v nvidia-smi &>/dev/null; then
echo "    GPU:           $(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1)"
fi
echo "    Whisper:       $(which whisper-cli 2>/dev/null || echo 'not found')"
echo "    Repo:          $LLMS_DIR"
echo ""
echo "  Dashboard:       http://localhost:3000"
echo "  Ollama API:      http://localhost:11434"
echo ""
echo "  Useful commands:"
echo "    bash start-dashboard.sh          # start dashboard"
echo "    bash start-dashboard.sh status   # check status"
echo "    bash start-dashboard.sh stop     # stop dashboard"
echo ""
echo "  Test TTS:"
echo "    .venv-qwen-tts/bin/python test_orpheus_tts.py"
echo "    .venv-qwen-tts/bin/python test_qwen_tts.py"
echo ""