#!/bin/bash # ============================================================ # TTS Setup — One-Shot Script for Fresh Laptop # # Sets up Orpheus TTS (via Ollama) and Qwen3-TTS (direct Python) # on Apple Silicon Macs. Works through corporate proxy. # # What this does: # 1. Installs Python 3.12 via Homebrew (if missing) # 2. Creates Python venv with TTS packages # 3. Pulls Orpheus TTS model via Ollama # 4. Downloads SNAC audio decoder via hf-mirror.com # 5. (Optional) Downloads Qwen3-TTS 0.6B via hf-mirror.com # # Prerequisites: # - macOS with Apple Silicon (M1/M2/M3/M4) # - Homebrew installed # - Ollama installed (brew install ollama) # # Usage: # bash setup-tts.sh # # After setup, test with: # .venv-qwen-tts/bin/python test_orpheus_tts.py # afplay test_orpheus_tara.wav # ============================================================ set -e SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" VENV="$SCRIPT_DIR/.venv-qwen-tts" MODELS_DIR="$SCRIPT_DIR/models" # HuggingFace mirror that works through corporate proxy HF_MIRROR="https://hf-mirror.com" RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' ok() { echo -e "${GREEN}✓${NC} $1"; } warn() { echo -e "${YELLOW}⚠${NC} $1"; } fail() { echo -e "${RED}✗${NC} $1"; exit 1; } step() { echo -e "\n${GREEN}=== $1 ===${NC}"; } echo "╔══════════════════════════════════════════════╗" echo "║ TTS Setup — Local Speech Generation ║" echo "║ Orpheus TTS (Ollama) + Qwen3-TTS (Python) ║" echo "╚══════════════════════════════════════════════╝" echo "" # ── 0. Check prerequisites ────────────────────────────────── step "Checking prerequisites" # Homebrew if ! command -v brew &>/dev/null; then fail "Homebrew not found. Install: /bin/bash -c \"\$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)\"" fi ok "Homebrew" # Ollama if ! command -v ollama &>/dev/null; then warn "Ollama not found. Installing..." brew install ollama fi ok "Ollama installed" # Check if Ollama is running if ! curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then warn "Ollama not running. Starting..." ollama serve &>/dev/null & sleep 3 if ! curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then fail "Could not start Ollama. Try manually: ollama serve" fi fi ok "Ollama running on port 11434" # Apple Silicon check ARCH=$(uname -m) if [ "$ARCH" != "arm64" ]; then warn "Not Apple Silicon ($ARCH). MPS acceleration won't be available." fi # ── 1. Install Python 3.12 ────────────────────────────────── step "Python 3.12" PYTHON_CMD="" # Check various Python 3.12 locations for cmd in python3.12 /opt/homebrew/bin/python3.12 /usr/local/bin/python3.12; do if command -v "$cmd" &>/dev/null; then PYTHON_CMD="$cmd" break fi done if [ -z "$PYTHON_CMD" ]; then warn "Python 3.12 not found. Installing via Homebrew..." brew install python@3.12 PYTHON_CMD="/opt/homebrew/bin/python3.12" fi PYTHON_VER=$("$PYTHON_CMD" --version 2>&1) ok "$PYTHON_VER at $PYTHON_CMD" # ── 2. Create venv ────────────────────────────────────────── step "Python virtual environment" if [ -f "$VENV/bin/python" ]; then ok "Venv exists at $VENV" else echo "Creating venv..." "$PYTHON_CMD" -m venv "$VENV" ok "Venv created at $VENV" fi # ── 3. Install Python packages ────────────────────────────── step "Python packages" # Check if snac is installed (quick proxy for all packages) if "$VENV/bin/python" -c "import snac" &>/dev/null; then ok "Packages already installed (snac, torch, etc.)" else echo "Installing packages (this may take a few minutes)..." "$VENV/bin/pip" install -U pip --quiet "$VENV/bin/pip" install -U snac qwen-tts --quiet ok "Packages installed" fi # ── 4. Pull Orpheus TTS model ─────────────────────────────── step "Orpheus TTS model (Ollama)" if ollama list 2>/dev/null | grep -q "orpheus"; then ok "Orpheus TTS already downloaded" else echo "Pulling sematre/orpheus:en (4 GB)..." NO_PROXY="ollama.com,registry.ollama.ai" ollama pull sematre/orpheus:en ok "Orpheus TTS downloaded" fi # ── 5. Download SNAC decoder ──────────────────────────────── step "SNAC 24kHz audio decoder (~76 MB)" mkdir -p "$MODELS_DIR/snac_24khz" if [ -f "$MODELS_DIR/snac_24khz/pytorch_model.bin" ]; then SIZE=$(stat -f%z "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null || stat -c%s "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null) if [ "$SIZE" -gt 1000000 ]; then ok "SNAC decoder already downloaded ($(echo "scale=1; $SIZE/1048576" | bc) MB)" else warn "SNAC file looks corrupted (${SIZE} bytes). Re-downloading..." rm -f "$MODELS_DIR/snac_24khz/pytorch_model.bin" fi fi if [ ! -f "$MODELS_DIR/snac_24khz/pytorch_model.bin" ]; then echo "Downloading config.json..." curl -k -sL -o "$MODELS_DIR/snac_24khz/config.json" \ "$HF_MIRROR/hubertsiuzdak/snac_24khz/raw/main/config.json" # Verify config is JSON (not an HTML block page) if ! python3 -c "import json; json.load(open('$MODELS_DIR/snac_24khz/config.json'))" &>/dev/null; then fail "Downloaded config.json is not valid JSON. The mirror may be blocked. Try from home network." fi ok "config.json downloaded" echo "Downloading pytorch_model.bin (~76 MB)..." curl -k -L --progress-bar -o "$MODELS_DIR/snac_24khz/pytorch_model.bin" \ "$HF_MIRROR/hubertsiuzdak/snac_24khz/resolve/main/pytorch_model.bin" # Verify it's a real model file (zip/pytorch format), not HTML FILE_TYPE=$(file -b "$MODELS_DIR/snac_24khz/pytorch_model.bin" | head -c 20) if echo "$FILE_TYPE" | grep -qi "html"; then rm -f "$MODELS_DIR/snac_24khz/pytorch_model.bin" fail "Downloaded model is HTML (proxy block page). Try from home network." fi ok "SNAC decoder downloaded" fi # Verify SNAC loads in Python echo "Verifying SNAC decoder loads..." if "$VENV/bin/python" -c " import snac, torch model = snac.SNAC.from_pretrained('$MODELS_DIR/snac_24khz') print(f'SNAC: {sum(p.numel() for p in model.parameters())/1e6:.1f}M parameters') " 2>/dev/null; then ok "SNAC decoder verified" else fail "SNAC decoder failed to load. Delete models/snac_24khz/ and re-run." fi # ── 6. (Optional) Download Qwen3-TTS ──────────────────────── step "Qwen3-TTS 0.6B (optional, ~1.7 GB total)" QWEN_TOKENIZER_DIR="$MODELS_DIR/Qwen3-TTS-Tokenizer-12Hz" QWEN_MODEL_DIR="$MODELS_DIR/Qwen3-TTS-12Hz-0.6B-CustomVoice" if [ -d "$QWEN_MODEL_DIR" ] && [ -f "$QWEN_MODEL_DIR/config.json" ]; then ok "Qwen3-TTS already downloaded" else echo "Qwen3-TTS 0.6B requires ~1.7 GB download (tokenizer + model)." echo "This is optional — Orpheus TTS (above) works without it." read -p "Download Qwen3-TTS? [y/N] " -n 1 -r echo if [[ $REPLY =~ ^[Yy]$ ]]; then # Tokenizer (~650 MB) echo "Downloading Qwen3-TTS Tokenizer (~650 MB)..." mkdir -p "$QWEN_TOKENIZER_DIR" for f in config.json configuration.json preprocessor_config.json; do curl -k -sL -o "$QWEN_TOKENIZER_DIR/$f" \ "$HF_MIRROR/Qwen/Qwen3-TTS-Tokenizer-12Hz/resolve/main/$f" 2>/dev/null || true done curl -k -L --progress-bar -o "$QWEN_TOKENIZER_DIR/model.safetensors" \ "$HF_MIRROR/Qwen/Qwen3-TTS-Tokenizer-12Hz/resolve/main/model.safetensors" ok "Tokenizer downloaded" # Model echo "Downloading Qwen3-TTS 0.6B (~1.2 GB)..." mkdir -p "$QWEN_MODEL_DIR" for f in config.json generation_config.json; do curl -k -sL -o "$QWEN_MODEL_DIR/$f" \ "$HF_MIRROR/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice/resolve/main/$f" 2>/dev/null || true done curl -k -L --progress-bar -o "$QWEN_MODEL_DIR/model.safetensors" \ "$HF_MIRROR/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice/resolve/main/model.safetensors" ok "Qwen3-TTS 0.6B downloaded" else warn "Skipped. You can re-run this script later to download." fi fi # ── Summary ────────────────────────────────────────────────── step "Setup Complete" echo "" echo "Installed components:" echo " Orpheus TTS (Ollama): $(ollama list 2>/dev/null | grep orpheus | awk '{print $NF}' || echo 'ready')" echo " SNAC decoder: $MODELS_DIR/snac_24khz/" if [ -d "$QWEN_MODEL_DIR" ] && [ -f "$QWEN_MODEL_DIR/config.json" ]; then echo " Qwen3-TTS 0.6B: $QWEN_MODEL_DIR/" else echo " Qwen3-TTS 0.6B: (not installed — re-run setup to add)" fi echo "" echo "Disk usage:" du -sh "$MODELS_DIR"/* 2>/dev/null | sed 's/^/ /' echo "" echo "Test commands:" echo " $VENV/bin/python $SCRIPT_DIR/test_orpheus_tts.py" echo " afplay test_orpheus_tara.wav" if [ -d "$QWEN_MODEL_DIR" ]; then echo " $VENV/bin/python $SCRIPT_DIR/test_qwen_tts.py" fi echo "" echo "Voices: tara, leah, jess, leo, dan, mia, zac, zoe" echo "Emotion: , , , , , , "