learning_ai_common_plat/__LOCAL_LLMs/download-tts-models.sh
2026-02-21 14:13:07 -08:00

175 lines
6.3 KiB
Bash
Executable File

#!/bin/bash
# ============================================================
# Download TTS Model Weights
#
# Downloads SNAC decoder + Qwen3-TTS from HuggingFace.
# Uses hf-mirror.com which works through corporate proxy.
# Falls back to huggingface.co if mirror is unreachable.
#
# No Python venv required — uses curl only.
#
# Usage:
# bash download-tts-models.sh # download all
# bash download-tts-models.sh snac # SNAC decoder only
# bash download-tts-models.sh qwen # Qwen3-TTS only
# ============================================================
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
MODELS_DIR="$SCRIPT_DIR/models"
GREEN='\033[0;32m'
RED='\033[0;31m'
NC='\033[0m'
ok() { echo -e "${GREEN}${NC} $1"; }
fail() { echo -e "${RED}${NC} $1"; exit 1; }
echo "=== TTS Model Downloader ==="
echo ""
# ── Pick HuggingFace source ─────────────────────────────────
# Try hf-mirror.com first (works through corporate proxy)
# Fall back to huggingface.co (requires non-corporate network)
HF_BASE=""
echo "Testing hf-mirror.com..."
if curl -k -s --max-time 5 "https://hf-mirror.com/hubertsiuzdak/snac_24khz/raw/main/config.json" | python3 -c "import sys,json; json.load(sys.stdin)" &>/dev/null; then
HF_BASE="https://hf-mirror.com"
ok "Using hf-mirror.com (works through corporate proxy)"
else
echo "Mirror unavailable. Testing huggingface.co..."
if curl -s --max-time 5 "https://huggingface.co/api/models/hubertsiuzdak/snac_24khz" -o /dev/null 2>/dev/null; then
HF_BASE="https://huggingface.co"
ok "Using huggingface.co directly"
else
fail "Cannot reach hf-mirror.com or huggingface.co. If on corporate network, try from home WiFi."
fi
fi
echo ""
mkdir -p "$MODELS_DIR"
# ── Helper: download with validation ────────────────────────
download_file() {
local URL="$1"
local DEST="$2"
local DESC="$3"
echo " Downloading $DESC..."
curl -k -L --progress-bar -o "$DEST" "$URL"
# Verify not an HTML block page
FILE_HEAD=$(head -c 50 "$DEST" 2>/dev/null)
if echo "$FILE_HEAD" | grep -qi "<!DOCTYPE\|<html"; then
rm -f "$DEST"
fail "Downloaded $DESC is HTML (proxy block page). Try from non-corporate network."
fi
}
# ── 1. SNAC 24kHz decoder ───────────────────────────────────
download_snac() {
echo "=== [SNAC] 24kHz Audio Decoder (~76 MB) ==="
mkdir -p "$MODELS_DIR/snac_24khz"
if [ -f "$MODELS_DIR/snac_24khz/pytorch_model.bin" ]; then
SIZE=$(stat -f%z "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null || stat -c%s "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null || echo 0)
if [ "$SIZE" -gt 1000000 ]; then
ok "Already downloaded ($(echo "scale=1; $SIZE/1048576" | bc) MB)"
echo ""
return
fi
fi
download_file "$HF_BASE/hubertsiuzdak/snac_24khz/raw/main/config.json" \
"$MODELS_DIR/snac_24khz/config.json" "config.json"
download_file "$HF_BASE/hubertsiuzdak/snac_24khz/resolve/main/pytorch_model.bin" \
"$MODELS_DIR/snac_24khz/pytorch_model.bin" "pytorch_model.bin (~76 MB)"
ok "SNAC decoder downloaded"
echo ""
}
# ── 2. Qwen3-TTS Tokenizer ──────────────────────────────────
download_qwen_tokenizer() {
echo "=== [Qwen3-TTS] Tokenizer (~650 MB) ==="
local DIR="$MODELS_DIR/Qwen3-TTS-Tokenizer-12Hz"
mkdir -p "$DIR"
if [ -f "$DIR/model.safetensors" ]; then
SIZE=$(stat -f%z "$DIR/model.safetensors" 2>/dev/null || stat -c%s "$DIR/model.safetensors" 2>/dev/null || echo 0)
if [ "$SIZE" -gt 100000000 ]; then
ok "Already downloaded ($(echo "scale=1; $SIZE/1048576" | bc) MB)"
echo ""
return
fi
fi
for f in config.json configuration.json preprocessor_config.json; do
download_file "$HF_BASE/Qwen/Qwen3-TTS-Tokenizer-12Hz/resolve/main/$f" \
"$DIR/$f" "$f"
done
download_file "$HF_BASE/Qwen/Qwen3-TTS-Tokenizer-12Hz/resolve/main/model.safetensors" \
"$DIR/model.safetensors" "model.safetensors (~650 MB)"
ok "Qwen3-TTS Tokenizer downloaded"
echo ""
}
# ── 3. Qwen3-TTS 0.6B model ─────────────────────────────────
download_qwen_model() {
echo "=== [Qwen3-TTS] 0.6B CustomVoice (~1.2 GB) ==="
local DIR="$MODELS_DIR/Qwen3-TTS-12Hz-0.6B-CustomVoice"
mkdir -p "$DIR"
if [ -f "$DIR/model.safetensors" ]; then
SIZE=$(stat -f%z "$DIR/model.safetensors" 2>/dev/null || stat -c%s "$DIR/model.safetensors" 2>/dev/null || echo 0)
if [ "$SIZE" -gt 100000000 ]; then
ok "Already downloaded ($(echo "scale=1; $SIZE/1048576" | bc) MB)"
echo ""
return
fi
fi
for f in config.json generation_config.json; do
download_file "$HF_BASE/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice/resolve/main/$f" \
"$DIR/$f" "$f"
done
download_file "$HF_BASE/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice/resolve/main/model.safetensors" \
"$DIR/model.safetensors" "model.safetensors (~1.2 GB)"
ok "Qwen3-TTS 0.6B downloaded"
echo ""
}
# ── Run downloads ────────────────────────────────────────────
case "${1:-all}" in
snac)
download_snac
;;
qwen)
download_qwen_tokenizer
download_qwen_model
;;
all)
download_snac
download_qwen_tokenizer
download_qwen_model
;;
*)
echo "Usage: bash download-tts-models.sh [snac|qwen|all]"
exit 1
;;
esac
# ── Summary ──────────────────────────────────────────────────
echo "=== Downloads complete ==="
echo ""
echo "Disk usage:"
du -sh "$MODELS_DIR"/* 2>/dev/null | sed 's/^/ /'
echo ""
echo "Test commands:"
echo " .venv-qwen-tts/bin/python test_orpheus_tts.py # Orpheus via Ollama"
echo " .venv-qwen-tts/bin/python test_qwen_tts.py # Qwen3-TTS direct"