175 lines
6.3 KiB
Bash
Executable File
175 lines
6.3 KiB
Bash
Executable File
#!/bin/bash
|
|
# ============================================================
|
|
# Download TTS Model Weights
|
|
#
|
|
# Downloads SNAC decoder + Qwen3-TTS from HuggingFace.
|
|
# Uses hf-mirror.com which works through corporate proxy.
|
|
# Falls back to huggingface.co if mirror is unreachable.
|
|
#
|
|
# No Python venv required — uses curl only.
|
|
#
|
|
# Usage:
|
|
# bash download-tts-models.sh # download all
|
|
# bash download-tts-models.sh snac # SNAC decoder only
|
|
# bash download-tts-models.sh qwen # Qwen3-TTS only
|
|
# ============================================================
|
|
set -e
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
MODELS_DIR="$SCRIPT_DIR/models"
|
|
|
|
GREEN='\033[0;32m'
|
|
RED='\033[0;31m'
|
|
NC='\033[0m'
|
|
ok() { echo -e "${GREEN}✓${NC} $1"; }
|
|
fail() { echo -e "${RED}✗${NC} $1"; exit 1; }
|
|
|
|
echo "=== TTS Model Downloader ==="
|
|
echo ""
|
|
|
|
# ── Pick HuggingFace source ─────────────────────────────────
|
|
# Try hf-mirror.com first (works through corporate proxy)
|
|
# Fall back to huggingface.co (requires non-corporate network)
|
|
HF_BASE=""
|
|
echo "Testing hf-mirror.com..."
|
|
if curl -k -s --max-time 5 "https://hf-mirror.com/hubertsiuzdak/snac_24khz/raw/main/config.json" | python3 -c "import sys,json; json.load(sys.stdin)" &>/dev/null; then
|
|
HF_BASE="https://hf-mirror.com"
|
|
ok "Using hf-mirror.com (works through corporate proxy)"
|
|
else
|
|
echo "Mirror unavailable. Testing huggingface.co..."
|
|
if curl -s --max-time 5 "https://huggingface.co/api/models/hubertsiuzdak/snac_24khz" -o /dev/null 2>/dev/null; then
|
|
HF_BASE="https://huggingface.co"
|
|
ok "Using huggingface.co directly"
|
|
else
|
|
fail "Cannot reach hf-mirror.com or huggingface.co. If on corporate network, try from home WiFi."
|
|
fi
|
|
fi
|
|
echo ""
|
|
|
|
mkdir -p "$MODELS_DIR"
|
|
|
|
# ── Helper: download with validation ────────────────────────
|
|
download_file() {
|
|
local URL="$1"
|
|
local DEST="$2"
|
|
local DESC="$3"
|
|
|
|
echo " Downloading $DESC..."
|
|
curl -k -L --progress-bar -o "$DEST" "$URL"
|
|
|
|
# Verify not an HTML block page
|
|
FILE_HEAD=$(head -c 50 "$DEST" 2>/dev/null)
|
|
if echo "$FILE_HEAD" | grep -qi "<!DOCTYPE\|<html"; then
|
|
rm -f "$DEST"
|
|
fail "Downloaded $DESC is HTML (proxy block page). Try from non-corporate network."
|
|
fi
|
|
}
|
|
|
|
# ── 1. SNAC 24kHz decoder ───────────────────────────────────
|
|
download_snac() {
|
|
echo "=== [SNAC] 24kHz Audio Decoder (~76 MB) ==="
|
|
mkdir -p "$MODELS_DIR/snac_24khz"
|
|
|
|
if [ -f "$MODELS_DIR/snac_24khz/pytorch_model.bin" ]; then
|
|
SIZE=$(stat -f%z "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null || stat -c%s "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null || echo 0)
|
|
if [ "$SIZE" -gt 1000000 ]; then
|
|
ok "Already downloaded ($(echo "scale=1; $SIZE/1048576" | bc) MB)"
|
|
echo ""
|
|
return
|
|
fi
|
|
fi
|
|
|
|
download_file "$HF_BASE/hubertsiuzdak/snac_24khz/raw/main/config.json" \
|
|
"$MODELS_DIR/snac_24khz/config.json" "config.json"
|
|
|
|
download_file "$HF_BASE/hubertsiuzdak/snac_24khz/resolve/main/pytorch_model.bin" \
|
|
"$MODELS_DIR/snac_24khz/pytorch_model.bin" "pytorch_model.bin (~76 MB)"
|
|
|
|
ok "SNAC decoder downloaded"
|
|
echo ""
|
|
}
|
|
|
|
# ── 2. Qwen3-TTS Tokenizer ──────────────────────────────────
|
|
download_qwen_tokenizer() {
|
|
echo "=== [Qwen3-TTS] Tokenizer (~650 MB) ==="
|
|
local DIR="$MODELS_DIR/Qwen3-TTS-Tokenizer-12Hz"
|
|
mkdir -p "$DIR"
|
|
|
|
if [ -f "$DIR/model.safetensors" ]; then
|
|
SIZE=$(stat -f%z "$DIR/model.safetensors" 2>/dev/null || stat -c%s "$DIR/model.safetensors" 2>/dev/null || echo 0)
|
|
if [ "$SIZE" -gt 100000000 ]; then
|
|
ok "Already downloaded ($(echo "scale=1; $SIZE/1048576" | bc) MB)"
|
|
echo ""
|
|
return
|
|
fi
|
|
fi
|
|
|
|
for f in config.json configuration.json preprocessor_config.json; do
|
|
download_file "$HF_BASE/Qwen/Qwen3-TTS-Tokenizer-12Hz/resolve/main/$f" \
|
|
"$DIR/$f" "$f"
|
|
done
|
|
|
|
download_file "$HF_BASE/Qwen/Qwen3-TTS-Tokenizer-12Hz/resolve/main/model.safetensors" \
|
|
"$DIR/model.safetensors" "model.safetensors (~650 MB)"
|
|
|
|
ok "Qwen3-TTS Tokenizer downloaded"
|
|
echo ""
|
|
}
|
|
|
|
# ── 3. Qwen3-TTS 0.6B model ─────────────────────────────────
|
|
download_qwen_model() {
|
|
echo "=== [Qwen3-TTS] 0.6B CustomVoice (~1.2 GB) ==="
|
|
local DIR="$MODELS_DIR/Qwen3-TTS-12Hz-0.6B-CustomVoice"
|
|
mkdir -p "$DIR"
|
|
|
|
if [ -f "$DIR/model.safetensors" ]; then
|
|
SIZE=$(stat -f%z "$DIR/model.safetensors" 2>/dev/null || stat -c%s "$DIR/model.safetensors" 2>/dev/null || echo 0)
|
|
if [ "$SIZE" -gt 100000000 ]; then
|
|
ok "Already downloaded ($(echo "scale=1; $SIZE/1048576" | bc) MB)"
|
|
echo ""
|
|
return
|
|
fi
|
|
fi
|
|
|
|
for f in config.json generation_config.json; do
|
|
download_file "$HF_BASE/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice/resolve/main/$f" \
|
|
"$DIR/$f" "$f"
|
|
done
|
|
|
|
download_file "$HF_BASE/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice/resolve/main/model.safetensors" \
|
|
"$DIR/model.safetensors" "model.safetensors (~1.2 GB)"
|
|
|
|
ok "Qwen3-TTS 0.6B downloaded"
|
|
echo ""
|
|
}
|
|
|
|
# ── Run downloads ────────────────────────────────────────────
|
|
case "${1:-all}" in
|
|
snac)
|
|
download_snac
|
|
;;
|
|
qwen)
|
|
download_qwen_tokenizer
|
|
download_qwen_model
|
|
;;
|
|
all)
|
|
download_snac
|
|
download_qwen_tokenizer
|
|
download_qwen_model
|
|
;;
|
|
*)
|
|
echo "Usage: bash download-tts-models.sh [snac|qwen|all]"
|
|
exit 1
|
|
;;
|
|
esac
|
|
|
|
# ── Summary ──────────────────────────────────────────────────
|
|
echo "=== Downloads complete ==="
|
|
echo ""
|
|
echo "Disk usage:"
|
|
du -sh "$MODELS_DIR"/* 2>/dev/null | sed 's/^/ /'
|
|
echo ""
|
|
echo "Test commands:"
|
|
echo " .venv-qwen-tts/bin/python test_orpheus_tts.py # Orpheus via Ollama"
|
|
echo " .venv-qwen-tts/bin/python test_qwen_tts.py # Qwen3-TTS direct"
|