ci: update CI/CD configuration
This commit is contained in:
parent
14c7883d2a
commit
f85b455eb5
5
.gitignore
vendored
5
.gitignore
vendored
@ -14,3 +14,8 @@ coverage/
|
|||||||
*.key
|
*.key
|
||||||
kv.txt
|
kv.txt
|
||||||
kv_azure.txt
|
kv_azure.txt
|
||||||
|
|
||||||
|
# Local LLM models & venvs
|
||||||
|
__LOCAL_LLMs/models/
|
||||||
|
__LOCAL_LLMs/.venv-*/
|
||||||
|
__LOCAL_LLMs/*.wav
|
||||||
|
|||||||
@ -0,0 +1,267 @@
|
|||||||
|
'use client';
|
||||||
|
|
||||||
|
import { useState, useEffect } from 'react';
|
||||||
|
import { RefreshCw, Cpu, HardDrive, Archive, Layers, Zap } from 'lucide-react';
|
||||||
|
import { formatBytes } from '../../../lib/format';
|
||||||
|
import { ProgressBar } from '../../../components/ProgressBar';
|
||||||
|
|
||||||
|
interface VmCategory {
|
||||||
|
active: number;
|
||||||
|
wired: number;
|
||||||
|
compressor: number;
|
||||||
|
inactive: number;
|
||||||
|
purgeable: number;
|
||||||
|
speculative: number;
|
||||||
|
free: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface GroupedProcess {
|
||||||
|
name: string;
|
||||||
|
rss: number;
|
||||||
|
pctMem: number;
|
||||||
|
count: number;
|
||||||
|
pids: number[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface MemoryDrilldownData {
|
||||||
|
totalRam: number;
|
||||||
|
categories: VmCategory;
|
||||||
|
processes: GroupedProcess[];
|
||||||
|
}
|
||||||
|
|
||||||
|
const CATEGORY_META: Record<
|
||||||
|
keyof VmCategory,
|
||||||
|
{ label: string; color: string; description: string }
|
||||||
|
> = {
|
||||||
|
active: {
|
||||||
|
label: 'Active',
|
||||||
|
color: 'var(--accent-primary)',
|
||||||
|
description: 'Pages recently used by apps',
|
||||||
|
},
|
||||||
|
wired: {
|
||||||
|
label: 'Wired',
|
||||||
|
color: 'var(--danger)',
|
||||||
|
description: 'Kernel & drivers — cannot be paged out',
|
||||||
|
},
|
||||||
|
compressor: {
|
||||||
|
label: 'Compressed',
|
||||||
|
color: 'var(--warning)',
|
||||||
|
description: 'Pages compressed to save RAM (still counts as used)',
|
||||||
|
},
|
||||||
|
inactive: {
|
||||||
|
label: 'Inactive',
|
||||||
|
color: 'var(--accent-secondary)',
|
||||||
|
description: 'Recently freed — reclaimable on demand',
|
||||||
|
},
|
||||||
|
purgeable: {
|
||||||
|
label: 'Purgeable',
|
||||||
|
color: 'var(--purple)',
|
||||||
|
description: 'Cache that macOS can discard immediately',
|
||||||
|
},
|
||||||
|
speculative: {
|
||||||
|
label: 'Speculative',
|
||||||
|
color: 'var(--text-tertiary)',
|
||||||
|
description: 'Pre-fetched pages — reclaimable',
|
||||||
|
},
|
||||||
|
free: {
|
||||||
|
label: 'Free',
|
||||||
|
color: 'var(--success)',
|
||||||
|
description: 'Unused pages — immediately available',
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
export function MemoryDrilldown() {
|
||||||
|
const [data, setData] = useState<MemoryDrilldownData | null>(null);
|
||||||
|
const [loading, setLoading] = useState(true);
|
||||||
|
|
||||||
|
const fetchData = async () => {
|
||||||
|
setLoading(true);
|
||||||
|
try {
|
||||||
|
const res = await fetch('/api/system/memory');
|
||||||
|
if (res.ok) setData(await res.json());
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
setLoading(false);
|
||||||
|
};
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
fetchData();
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
if (loading && !data) {
|
||||||
|
return (
|
||||||
|
<div className="flex items-center justify-center py-6">
|
||||||
|
<RefreshCw className="w-4 h-4 animate-spin" style={{ color: 'var(--text-tertiary)' }} />
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (!data) return null;
|
||||||
|
|
||||||
|
const total = data.totalRam;
|
||||||
|
const cats = data.categories;
|
||||||
|
const appMemory = cats.active + cats.wired + cats.compressor;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
{/* Category breakdown header */}
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<span className="text-xs font-semibold" style={{ color: 'var(--text-secondary)' }}>
|
||||||
|
Memory Categories (vm_stat)
|
||||||
|
</span>
|
||||||
|
<button
|
||||||
|
onClick={fetchData}
|
||||||
|
disabled={loading}
|
||||||
|
className="p-1 rounded transition-colors"
|
||||||
|
style={{ color: 'var(--text-tertiary)' }}
|
||||||
|
title="Refresh"
|
||||||
|
>
|
||||||
|
<RefreshCw className={`w-3.5 h-3.5 ${loading ? 'animate-spin' : ''}`} />
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Stacked bar */}
|
||||||
|
<div
|
||||||
|
className="flex w-full h-6 rounded-md overflow-hidden"
|
||||||
|
style={{ background: 'var(--surface-muted)' }}
|
||||||
|
>
|
||||||
|
{(Object.keys(CATEGORY_META) as (keyof VmCategory)[]).map(key => {
|
||||||
|
const bytes = cats[key];
|
||||||
|
const pct = (bytes / total) * 100;
|
||||||
|
if (pct < 0.3) return null;
|
||||||
|
const meta = CATEGORY_META[key];
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
key={key}
|
||||||
|
className="h-full flex items-center justify-center text-[9px] font-medium overflow-hidden shrink-0"
|
||||||
|
style={{
|
||||||
|
width: `${pct}%`,
|
||||||
|
background: meta.color,
|
||||||
|
color: 'var(--bg-canvas)',
|
||||||
|
opacity: 0.85,
|
||||||
|
}}
|
||||||
|
title={`${meta.label}: ${formatBytes(bytes)} (${pct.toFixed(1)}%)`}
|
||||||
|
>
|
||||||
|
{pct > 6 ? meta.label : ''}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Legend grid */}
|
||||||
|
<div className="grid grid-cols-2 gap-x-4 gap-y-1.5">
|
||||||
|
{(Object.keys(CATEGORY_META) as (keyof VmCategory)[]).map(key => {
|
||||||
|
const bytes = cats[key];
|
||||||
|
const pct = (bytes / total) * 100;
|
||||||
|
const meta = CATEGORY_META[key];
|
||||||
|
const isApp = key === 'active' || key === 'wired' || key === 'compressor';
|
||||||
|
return (
|
||||||
|
<div key={key} className="flex items-center justify-between" title={meta.description}>
|
||||||
|
<div className="flex items-center gap-1.5">
|
||||||
|
<span
|
||||||
|
className="w-2.5 h-2.5 rounded-sm inline-block shrink-0"
|
||||||
|
style={{ background: meta.color, opacity: 0.85 }}
|
||||||
|
/>
|
||||||
|
<span
|
||||||
|
className="text-[11px]"
|
||||||
|
style={{ color: isApp ? 'var(--text-secondary)' : 'var(--text-tertiary)' }}
|
||||||
|
>
|
||||||
|
{meta.label}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<span className="text-[11px] font-mono" style={{ color: 'var(--text-tertiary)' }}>
|
||||||
|
{formatBytes(bytes)}
|
||||||
|
<span className="ml-1 text-[9px]">({pct.toFixed(1)}%)</span>
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Summary line */}
|
||||||
|
<div
|
||||||
|
className="flex items-center justify-between px-2 py-1.5 rounded-md text-[11px]"
|
||||||
|
style={{ background: 'var(--surface-muted)' }}
|
||||||
|
>
|
||||||
|
<span style={{ color: 'var(--text-secondary)' }}>
|
||||||
|
<strong>App memory</strong> (active + wired + compressed)
|
||||||
|
</span>
|
||||||
|
<span className="font-mono font-semibold" style={{ color: 'var(--text-primary)' }}>
|
||||||
|
{formatBytes(appMemory)}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Top processes */}
|
||||||
|
<div>
|
||||||
|
<span className="text-xs font-semibold" style={{ color: 'var(--text-secondary)' }}>
|
||||||
|
Top Processes by Memory
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div className="space-y-1.5">
|
||||||
|
{data.processes.slice(0, 15).map((proc, i) => {
|
||||||
|
const pct = (proc.rss / total) * 100;
|
||||||
|
const isOllama = proc.name.toLowerCase().includes('ollama');
|
||||||
|
const isNode =
|
||||||
|
proc.name.toLowerCase().includes('node') || proc.name.toLowerCase().includes('next');
|
||||||
|
return (
|
||||||
|
<div key={`${proc.name}-${i}`}>
|
||||||
|
<div className="flex items-center justify-between mb-0.5">
|
||||||
|
<div className="flex items-center gap-1.5 min-w-0">
|
||||||
|
{isOllama ? (
|
||||||
|
<Zap className="w-3 h-3 shrink-0" style={{ color: 'var(--success)' }} />
|
||||||
|
) : isNode ? (
|
||||||
|
<Layers
|
||||||
|
className="w-3 h-3 shrink-0"
|
||||||
|
style={{ color: 'var(--accent-secondary)' }}
|
||||||
|
/>
|
||||||
|
) : (
|
||||||
|
<Cpu className="w-3 h-3 shrink-0" style={{ color: 'var(--text-tertiary)' }} />
|
||||||
|
)}
|
||||||
|
<span
|
||||||
|
className="text-[11px] font-mono truncate"
|
||||||
|
style={{
|
||||||
|
color: isOllama
|
||||||
|
? 'var(--success)'
|
||||||
|
: isNode
|
||||||
|
? 'var(--accent-secondary)'
|
||||||
|
: 'var(--text-secondary)',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{proc.name}
|
||||||
|
{proc.count > 1 && (
|
||||||
|
<span style={{ color: 'var(--text-tertiary)' }}> ×{proc.count}</span>
|
||||||
|
)}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<span
|
||||||
|
className="text-[11px] font-mono shrink-0 ml-2"
|
||||||
|
style={{ color: 'var(--text-tertiary)' }}
|
||||||
|
>
|
||||||
|
{formatBytes(proc.rss)}
|
||||||
|
<span className="ml-1 text-[9px]">({pct.toFixed(1)}%)</span>
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div
|
||||||
|
className="h-1.5 rounded-full overflow-hidden"
|
||||||
|
style={{ background: 'var(--surface-muted)' }}
|
||||||
|
>
|
||||||
|
<div
|
||||||
|
className="h-full rounded-full"
|
||||||
|
style={{
|
||||||
|
width: `${Math.max(0.5, pct)}%`,
|
||||||
|
background: isOllama
|
||||||
|
? 'var(--success)'
|
||||||
|
: isNode
|
||||||
|
? 'var(--accent-secondary)'
|
||||||
|
: 'var(--accent-primary)',
|
||||||
|
opacity: 0.7,
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
@ -36,6 +36,7 @@ import {
|
|||||||
Star,
|
Star,
|
||||||
MessageSquare,
|
MessageSquare,
|
||||||
Settings,
|
Settings,
|
||||||
|
Volume2,
|
||||||
} from 'lucide-react';
|
} from 'lucide-react';
|
||||||
import type {
|
import type {
|
||||||
OllamaData,
|
OllamaData,
|
||||||
@ -57,6 +58,7 @@ import { ProgressBar } from '../../components/ProgressBar';
|
|||||||
import { Sparkline } from '../../components/Sparkline';
|
import { Sparkline } from '../../components/Sparkline';
|
||||||
import { RamBudgetBar } from './components/RamBudgetBar';
|
import { RamBudgetBar } from './components/RamBudgetBar';
|
||||||
import { MarkdownResponse } from './components/MarkdownResponse';
|
import { MarkdownResponse } from './components/MarkdownResponse';
|
||||||
|
import { MemoryDrilldown } from './components/MemoryDrilldown';
|
||||||
|
|
||||||
export default function Dashboard() {
|
export default function Dashboard() {
|
||||||
const [ollama, setOllama] = useState<OllamaData | null>(null);
|
const [ollama, setOllama] = useState<OllamaData | null>(null);
|
||||||
@ -129,6 +131,19 @@ export default function Dashboard() {
|
|||||||
>([]);
|
>([]);
|
||||||
const [showInferenceLog, setShowInferenceLog] = useState(false);
|
const [showInferenceLog, setShowInferenceLog] = useState(false);
|
||||||
const [inferenceSearch, setInferenceSearch] = useState('');
|
const [inferenceSearch, setInferenceSearch] = useState('');
|
||||||
|
const [showMemoryDrilldown, setShowMemoryDrilldown] = useState(false);
|
||||||
|
const [ttsData, setTtsData] = useState<{
|
||||||
|
engines: Array<{
|
||||||
|
name: string;
|
||||||
|
type: 'ollama' | 'python';
|
||||||
|
status: 'ready' | 'partial' | 'missing';
|
||||||
|
model: string;
|
||||||
|
size?: string;
|
||||||
|
voices?: string[];
|
||||||
|
details: string;
|
||||||
|
}>;
|
||||||
|
venv: { exists: boolean; packages?: string[] };
|
||||||
|
} | null>(null);
|
||||||
const responseRef = useRef<HTMLDivElement>(null);
|
const responseRef = useRef<HTMLDivElement>(null);
|
||||||
const abortRef = useRef<AbortController | null>(null);
|
const abortRef = useRef<AbortController | null>(null);
|
||||||
const compareAbortRef = useRef<AbortController | null>(null);
|
const compareAbortRef = useRef<AbortController | null>(null);
|
||||||
@ -158,6 +173,13 @@ export default function Dashboard() {
|
|||||||
setMemoryHistory(prev => [...prev.slice(-29), sRes.value.memory.appMemory]);
|
setMemoryHistory(prev => [...prev.slice(-29), sRes.value.memory.appMemory]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// TTS engine status
|
||||||
|
try {
|
||||||
|
const tRes = await fetch('/api/tts');
|
||||||
|
if (tRes.ok) setTtsData(await tRes.json());
|
||||||
|
} catch {
|
||||||
|
/* ignore */
|
||||||
|
}
|
||||||
// F15: Check extraction service health via server-side proxy (avoids browser CORS/console errors)
|
// F15: Check extraction service health via server-side proxy (avoids browser CORS/console errors)
|
||||||
try {
|
try {
|
||||||
const eRes = await fetch('/api/extraction/health');
|
const eRes = await fetch('/api/extraction/health');
|
||||||
@ -1143,21 +1165,33 @@ export default function Dashboard() {
|
|||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className="card p-4">
|
<div
|
||||||
|
className="card p-4 cursor-pointer transition-all"
|
||||||
|
onClick={() => setShowMemoryDrilldown(prev => !prev)}
|
||||||
|
style={{
|
||||||
|
outline: showMemoryDrilldown ? '2px solid var(--warning)' : 'none',
|
||||||
|
outlineOffset: '-1px',
|
||||||
|
}}
|
||||||
|
title="Click to see memory drilldown"
|
||||||
|
>
|
||||||
<div className="flex items-center gap-2 mb-2">
|
<div className="flex items-center gap-2 mb-2">
|
||||||
<MemoryStick className="w-4 h-4" style={{ color: 'var(--warning)' }} />
|
<MemoryStick className="w-4 h-4" style={{ color: 'var(--warning)' }} />
|
||||||
<span className="text-xs font-medium" style={{ color: 'var(--text-tertiary)' }}>
|
<span className="text-xs font-medium" style={{ color: 'var(--text-tertiary)' }}>
|
||||||
MEMORY
|
MEMORY
|
||||||
</span>
|
</span>
|
||||||
|
<span className="text-[9px] ml-auto" style={{ color: 'var(--text-tertiary)' }}>
|
||||||
|
{showMemoryDrilldown ? '▲ hide' : '▼ drilldown'}
|
||||||
|
</span>
|
||||||
</div>
|
</div>
|
||||||
<span className="text-lg font-bold">
|
<span className="text-lg font-bold">
|
||||||
{formatBytes(system?.memory.appMemory || 0)}
|
{formatBytes(system?.memory.appMemory || 0)}
|
||||||
</span>
|
</span>
|
||||||
<span className="text-sm ml-1" style={{ color: 'var(--text-tertiary)' }}>
|
<span className="text-sm ml-1" style={{ color: 'var(--text-tertiary)' }}>
|
||||||
/ {formatBytes(system?.memory.total || 0)}
|
used / {formatBytes(system?.memory.total || 0)}
|
||||||
</span>
|
</span>
|
||||||
<p className="text-[10px] mt-0.5" style={{ color: 'var(--text-tertiary)' }}>
|
<p className="text-[10px] mt-0.5 font-medium" style={{ color: 'var(--success)' }}>
|
||||||
{formatBytes(system?.memory.cached || 0)} cached (reclaimable)
|
{formatBytes((system?.memory.free || 0) + (system?.memory.cached || 0) * 0.9)}{' '}
|
||||||
|
available for models
|
||||||
</p>
|
</p>
|
||||||
<div className="mt-2">
|
<div className="mt-2">
|
||||||
<ProgressBar
|
<ProgressBar
|
||||||
@ -1189,6 +1223,17 @@ export default function Dashboard() {
|
|||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{/* Memory Drilldown Panel */}
|
||||||
|
{showMemoryDrilldown && (
|
||||||
|
<div className="card p-6">
|
||||||
|
<h2 className="text-lg font-semibold flex items-center gap-2 mb-4">
|
||||||
|
<MemoryStick className="w-5 h-5" style={{ color: 'var(--warning)' }} />
|
||||||
|
Memory Drilldown
|
||||||
|
</h2>
|
||||||
|
<MemoryDrilldown />
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
{/* Main Grid */}
|
{/* Main Grid */}
|
||||||
<div className="grid grid-cols-1 lg:grid-cols-3 gap-6">
|
<div className="grid grid-cols-1 lg:grid-cols-3 gap-6">
|
||||||
{/* Ollama Models — 2 cols */}
|
{/* Ollama Models — 2 cols */}
|
||||||
@ -1351,7 +1396,7 @@ export default function Dashboard() {
|
|||||||
totalRam={system.memory.total}
|
totalRam={system.memory.total}
|
||||||
appMemory={system.memory.appMemory}
|
appMemory={system.memory.appMemory}
|
||||||
runningModels={ollama.running}
|
runningModels={ollama.running}
|
||||||
freeRam={system.memory.free}
|
freeRam={system.memory.free + system.memory.cached}
|
||||||
/>
|
/>
|
||||||
)}
|
)}
|
||||||
{ollama.models
|
{ollama.models
|
||||||
@ -1456,20 +1501,36 @@ export default function Dashboard() {
|
|||||||
</span>
|
</span>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
{/* Metrics row */}
|
||||||
<div
|
<div
|
||||||
className="flex items-center gap-3 text-xs mt-0.5 flex-wrap"
|
className="flex items-center gap-2 text-xs mt-1 flex-wrap"
|
||||||
style={{ color: 'var(--text-tertiary)' }}
|
style={{ color: 'var(--text-tertiary)' }}
|
||||||
>
|
>
|
||||||
<span>{formatBytes(model.size)}</span>
|
<span className="inline-flex items-center gap-1" title="Disk size">
|
||||||
|
<HardDrive className="w-3 h-3" />
|
||||||
|
{formatBytes(model.size)}
|
||||||
|
</span>
|
||||||
{model.details?.parameter_size && (
|
{model.details?.parameter_size && (
|
||||||
<span>{model.details.parameter_size}</span>
|
<span
|
||||||
|
className="inline-flex items-center gap-1"
|
||||||
|
title="Parameter count"
|
||||||
|
>
|
||||||
|
<Cpu className="w-3 h-3" />
|
||||||
|
{model.details.parameter_size}
|
||||||
|
</span>
|
||||||
)}
|
)}
|
||||||
{model.details?.quantization_level && (
|
{model.details?.quantization_level && (
|
||||||
<span>{model.details.quantization_level}</span>
|
<span
|
||||||
)}
|
className="px-1.5 py-0.5 rounded font-mono text-[10px]"
|
||||||
<span title="Estimated RAM when loaded (Apple Silicon unified memory)">
|
style={{
|
||||||
~{formatBytes(estRam)} RAM
|
background: 'var(--surface-card)',
|
||||||
|
color: 'var(--text-tertiary)',
|
||||||
|
}}
|
||||||
|
title="Quantization level — lower bits = smaller & faster but less accurate"
|
||||||
|
>
|
||||||
|
{model.details.quantization_level}
|
||||||
</span>
|
</span>
|
||||||
|
)}
|
||||||
{(() => {
|
{(() => {
|
||||||
const ctx = modelMetadata[model.name]?.contextLength;
|
const ctx = modelMetadata[model.name]?.contextLength;
|
||||||
return ctx ? (
|
return ctx ? (
|
||||||
@ -1486,7 +1547,86 @@ export default function Dashboard() {
|
|||||||
~{modelBenchmarks[model.name].tokPerSec.toFixed(1)} tok/s
|
~{modelBenchmarks[model.name].tokPerSec.toFixed(1)} tok/s
|
||||||
</span>
|
</span>
|
||||||
)}
|
)}
|
||||||
|
{(() => {
|
||||||
|
const ps = parseFloat(model.details?.parameter_size || '0');
|
||||||
|
const tier =
|
||||||
|
ps <= 3
|
||||||
|
? { label: 'Tiny · Instant', color: 'var(--success)' }
|
||||||
|
: ps <= 8
|
||||||
|
? { label: 'Small · Fast', color: 'var(--accent-secondary)' }
|
||||||
|
: ps <= 14
|
||||||
|
? { label: 'Medium', color: 'var(--accent-primary)' }
|
||||||
|
: ps <= 34
|
||||||
|
? { label: 'Large · Slow', color: 'var(--warning)' }
|
||||||
|
: { label: 'XL · Very Slow', color: 'var(--danger)' };
|
||||||
|
return (
|
||||||
|
<span
|
||||||
|
className="text-[10px] px-1.5 py-0.5 rounded font-medium"
|
||||||
|
style={{
|
||||||
|
background: `color-mix(in srgb, ${tier.color} 12%, transparent)`,
|
||||||
|
color: tier.color,
|
||||||
|
}}
|
||||||
|
title="Speed tier based on parameter count"
|
||||||
|
>
|
||||||
|
{tier.label}
|
||||||
|
</span>
|
||||||
|
);
|
||||||
|
})()}
|
||||||
</div>
|
</div>
|
||||||
|
{/* Memory fit — only for non-running models */}
|
||||||
|
{!running &&
|
||||||
|
system &&
|
||||||
|
(() => {
|
||||||
|
const avail = system.memory.free + system.memory.cached * 0.9;
|
||||||
|
const gap = avail - estRam;
|
||||||
|
const fitColor =
|
||||||
|
fitStatus === 'fits'
|
||||||
|
? 'var(--success)'
|
||||||
|
: fitStatus === 'tight'
|
||||||
|
? 'var(--warning)'
|
||||||
|
: 'var(--danger)';
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
className="mt-2 p-2 rounded-md"
|
||||||
|
style={{ background: 'var(--surface-card)' }}
|
||||||
|
>
|
||||||
|
<div className="flex items-center justify-between mb-1">
|
||||||
|
<span
|
||||||
|
className="text-[11px]"
|
||||||
|
style={{ color: 'var(--text-tertiary)' }}
|
||||||
|
>
|
||||||
|
Needs ~{formatBytes(estRam)} · {formatBytes(avail)}{' '}
|
||||||
|
available
|
||||||
|
</span>
|
||||||
|
<span
|
||||||
|
className="text-[10px] px-1.5 py-0.5 rounded-full font-medium"
|
||||||
|
style={{
|
||||||
|
background: `color-mix(in srgb, ${fitColor} 15%, transparent)`,
|
||||||
|
color: fitColor,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{fitStatus === 'fits'
|
||||||
|
? `✓ ${formatBytes(gap)} to spare`
|
||||||
|
: fitStatus === 'tight'
|
||||||
|
? `⚠ Tight — ${formatBytes(gap)} to spare`
|
||||||
|
: `✗ ${formatBytes(Math.abs(gap))} short`}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div
|
||||||
|
className="h-1.5 rounded-full overflow-hidden"
|
||||||
|
style={{ background: 'var(--surface-muted)' }}
|
||||||
|
>
|
||||||
|
<div
|
||||||
|
className="h-full rounded-full transition-all"
|
||||||
|
style={{
|
||||||
|
width: `${Math.min(100, Math.round((estRam / avail) * 100))}%`,
|
||||||
|
background: fitColor,
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
})()}
|
||||||
{running &&
|
{running &&
|
||||||
(() => {
|
(() => {
|
||||||
const rm = ollama?.running.find(r => r.name === model.name);
|
const rm = ollama?.running.find(r => r.name === model.name);
|
||||||
@ -1547,26 +1687,6 @@ export default function Dashboard() {
|
|||||||
</>
|
</>
|
||||||
) : (
|
) : (
|
||||||
<div className="flex items-center gap-2">
|
<div className="flex items-center gap-2">
|
||||||
{fitStatus && !running && (
|
|
||||||
<span
|
|
||||||
className="w-2 h-2 rounded-full shrink-0"
|
|
||||||
title={
|
|
||||||
fitStatus === 'fits'
|
|
||||||
? 'Fits comfortably in available memory'
|
|
||||||
: fitStatus === 'tight'
|
|
||||||
? 'Tight — may cause swap pressure'
|
|
||||||
: "Won't fit — will swap heavily"
|
|
||||||
}
|
|
||||||
style={{
|
|
||||||
background:
|
|
||||||
fitStatus === 'fits'
|
|
||||||
? 'var(--success)'
|
|
||||||
: fitStatus === 'tight'
|
|
||||||
? 'var(--warning)'
|
|
||||||
: 'var(--danger)',
|
|
||||||
}}
|
|
||||||
/>
|
|
||||||
)}
|
|
||||||
<button
|
<button
|
||||||
onClick={() => handleModelAction('load', model.name)}
|
onClick={() => handleModelAction('load', model.name)}
|
||||||
disabled={actionLoading === `load-${model.name}`}
|
disabled={actionLoading === `load-${model.name}`}
|
||||||
@ -1757,7 +1877,7 @@ export default function Dashboard() {
|
|||||||
(() => {
|
(() => {
|
||||||
const usedVram = ollama.running.reduce((sum, r) => sum + r.size_vram, 0);
|
const usedVram = ollama.running.reduce((sum, r) => sum + r.size_vram, 0);
|
||||||
const freeForModels =
|
const freeForModels =
|
||||||
system.memory.free + system.memory.cached * 0.5 - usedVram * 0.1;
|
system.memory.free + system.memory.cached * 0.9 - usedVram * 0.1;
|
||||||
const suggestions = ollama.models
|
const suggestions = ollama.models
|
||||||
.filter(m => !isRunning(m.name))
|
.filter(m => !isRunning(m.name))
|
||||||
.map(m => ({
|
.map(m => ({
|
||||||
@ -1831,8 +1951,9 @@ export default function Dashboard() {
|
|||||||
RAM
|
RAM
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
<span className="text-xs font-mono" style={{ color: 'var(--text-tertiary)' }}>
|
<span className="text-xs font-mono" style={{ color: 'var(--success)' }}>
|
||||||
{formatBytes(system?.memory.free || 0)} avail
|
{formatBytes((system?.memory.free || 0) + (system?.memory.cached || 0) * 0.9)}{' '}
|
||||||
|
avail
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
<ProgressBar
|
<ProgressBar
|
||||||
@ -1850,8 +1971,8 @@ export default function Dashboard() {
|
|||||||
className="flex justify-between mt-1 text-[10px]"
|
className="flex justify-between mt-1 text-[10px]"
|
||||||
style={{ color: 'var(--text-tertiary)' }}
|
style={{ color: 'var(--text-tertiary)' }}
|
||||||
>
|
>
|
||||||
<span>App: {formatBytes(system?.memory.appMemory || 0)}</span>
|
<span>Used: {formatBytes(system?.memory.appMemory || 0)}</span>
|
||||||
<span>Cache: {formatBytes(system?.memory.cached || 0)}</span>
|
<span>Total: {formatBytes(system?.memory.total || 0)}</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
@ -2024,6 +2145,116 @@ export default function Dashboard() {
|
|||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{/* Speech — TTS Engines */}
|
||||||
|
<div className="card p-6">
|
||||||
|
<h2 className="text-lg font-semibold flex items-center gap-2 mb-4">
|
||||||
|
<Volume2 className="w-5 h-5" style={{ color: 'var(--accent-primary)' }} />
|
||||||
|
Speech (TTS)
|
||||||
|
</h2>
|
||||||
|
{ttsData ? (
|
||||||
|
<div className="space-y-3">
|
||||||
|
{ttsData.engines.map(engine => (
|
||||||
|
<div
|
||||||
|
key={engine.name}
|
||||||
|
className="p-3 rounded-lg"
|
||||||
|
style={{ background: 'var(--surface-muted)' }}
|
||||||
|
>
|
||||||
|
<div className="flex items-center justify-between mb-1">
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<StatusDot
|
||||||
|
status={
|
||||||
|
engine.status === 'ready'
|
||||||
|
? 'online'
|
||||||
|
: engine.status === 'partial'
|
||||||
|
? 'warning'
|
||||||
|
: 'offline'
|
||||||
|
}
|
||||||
|
/>
|
||||||
|
<span className="text-sm font-semibold">{engine.name}</span>
|
||||||
|
<span
|
||||||
|
className="text-[10px] px-1.5 py-0.5 rounded font-mono"
|
||||||
|
style={{
|
||||||
|
background:
|
||||||
|
engine.type === 'ollama' ? 'var(--accent-primary)' : 'var(--purple)',
|
||||||
|
color: '#fff',
|
||||||
|
opacity: 0.85,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{engine.type === 'ollama' ? 'Ollama' : 'Python'}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
{engine.size && (
|
||||||
|
<span
|
||||||
|
className="text-[11px] font-mono"
|
||||||
|
style={{ color: 'var(--text-tertiary)' }}
|
||||||
|
>
|
||||||
|
{engine.size}
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<p className="text-xs ml-5" style={{ color: 'var(--text-tertiary)' }}>
|
||||||
|
{engine.model}
|
||||||
|
</p>
|
||||||
|
<p
|
||||||
|
className="text-xs ml-5 mt-0.5"
|
||||||
|
style={{
|
||||||
|
color:
|
||||||
|
engine.status === 'ready'
|
||||||
|
? 'var(--success)'
|
||||||
|
: engine.status === 'partial'
|
||||||
|
? 'var(--warning)'
|
||||||
|
: 'var(--text-tertiary)',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{engine.details}
|
||||||
|
</p>
|
||||||
|
{engine.voices && engine.status === 'ready' && (
|
||||||
|
<div className="flex flex-wrap gap-1 mt-2 ml-5">
|
||||||
|
{engine.voices.map(v => (
|
||||||
|
<span
|
||||||
|
key={v}
|
||||||
|
className="text-[10px] px-1.5 py-0.5 rounded font-mono"
|
||||||
|
style={{
|
||||||
|
background: 'var(--bg-elevated)',
|
||||||
|
color: 'var(--text-secondary)',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{v}
|
||||||
|
</span>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
{/* Venv status */}
|
||||||
|
<div
|
||||||
|
className="flex items-center justify-between text-xs pt-2"
|
||||||
|
style={{ borderTop: '1px solid var(--border-subtle)' }}
|
||||||
|
>
|
||||||
|
<span style={{ color: 'var(--text-tertiary)' }}>Python venv</span>
|
||||||
|
<span
|
||||||
|
style={{ color: ttsData.venv.exists ? 'var(--success)' : 'var(--warning)' }}
|
||||||
|
>
|
||||||
|
{ttsData.venv.exists ? (
|
||||||
|
<>✓ {ttsData.venv.packages?.join(' · ') || 'installed'}</>
|
||||||
|
) : (
|
||||||
|
'Not found — run setup-tts.sh'
|
||||||
|
)}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<div
|
||||||
|
className="p-3 rounded-lg text-center"
|
||||||
|
style={{ background: 'var(--surface-muted)' }}
|
||||||
|
>
|
||||||
|
<p className="text-xs" style={{ color: 'var(--text-tertiary)' }}>
|
||||||
|
Loading TTS status...
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
{/* Extraction Service (F15) */}
|
{/* Extraction Service (F15) */}
|
||||||
<div className="card p-6">
|
<div className="card p-6">
|
||||||
<h2 className="text-lg font-semibold flex items-center gap-2 mb-4">
|
<h2 className="text-lg font-semibold flex items-center gap-2 mb-4">
|
||||||
|
|||||||
136
__LOCAL_LLMs/dashboard/src/app/api/system/memory/route.ts
Normal file
136
__LOCAL_LLMs/dashboard/src/app/api/system/memory/route.ts
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
import { NextResponse } from 'next/server';
|
||||||
|
import { exec } from 'child_process';
|
||||||
|
import { promisify } from 'util';
|
||||||
|
import os from 'os';
|
||||||
|
|
||||||
|
const execAsync = promisify(exec);
|
||||||
|
|
||||||
|
interface ProcessInfo {
|
||||||
|
pid: number;
|
||||||
|
name: string;
|
||||||
|
rss: number; // bytes
|
||||||
|
pctMem: number;
|
||||||
|
user: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface VmStatBreakdown {
|
||||||
|
active: number;
|
||||||
|
wired: number;
|
||||||
|
compressor: number;
|
||||||
|
inactive: number;
|
||||||
|
purgeable: number;
|
||||||
|
speculative: number;
|
||||||
|
free: number;
|
||||||
|
pageSize: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getTopProcesses(limit = 20): Promise<ProcessInfo[]> {
|
||||||
|
try {
|
||||||
|
// ps with RSS in KB, sorted descending by RSS
|
||||||
|
const { stdout } = await execAsync(
|
||||||
|
`ps -axo pid=,rss=,%mem=,user=,comm= | sort -k2 -rn | head -${limit}`,
|
||||||
|
{ timeout: 3000 }
|
||||||
|
);
|
||||||
|
return stdout
|
||||||
|
.trim()
|
||||||
|
.split('\n')
|
||||||
|
.filter(Boolean)
|
||||||
|
.map(line => {
|
||||||
|
const parts = line.trim().split(/\s+/);
|
||||||
|
const pid = parseInt(parts[0]);
|
||||||
|
const rssKb = parseInt(parts[1]);
|
||||||
|
const pctMem = parseFloat(parts[2]);
|
||||||
|
const user = parts[3];
|
||||||
|
// comm can have spaces/slashes — take everything after user
|
||||||
|
const rawName = parts.slice(4).join(' ');
|
||||||
|
// Extract just the process name from the full path
|
||||||
|
const name = rawName.split('/').pop() || rawName;
|
||||||
|
return {
|
||||||
|
pid,
|
||||||
|
name,
|
||||||
|
rss: rssKb * 1024,
|
||||||
|
pctMem,
|
||||||
|
user,
|
||||||
|
};
|
||||||
|
})
|
||||||
|
.filter(p => p.rss > 0);
|
||||||
|
} catch {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getVmStatBreakdown(): Promise<VmStatBreakdown> {
|
||||||
|
try {
|
||||||
|
const { stdout } = await execAsync('vm_stat', { timeout: 2000 });
|
||||||
|
const pageSizeMatch = stdout.match(/page size of (\d+) bytes/);
|
||||||
|
const pageSize = pageSizeMatch ? parseInt(pageSizeMatch[1]) : 16384;
|
||||||
|
const parse = (label: string): number => {
|
||||||
|
const match = stdout.match(new RegExp(`${label}:\\s+(\\d+)`));
|
||||||
|
return match ? parseInt(match[1]) * pageSize : 0;
|
||||||
|
};
|
||||||
|
return {
|
||||||
|
active: parse('Pages active'),
|
||||||
|
wired: parse('Pages wired down'),
|
||||||
|
compressor: parse('Pages occupied by compressor'),
|
||||||
|
inactive: parse('Pages inactive'),
|
||||||
|
purgeable: parse('Pages purgeable'),
|
||||||
|
speculative: parse('Pages speculative'),
|
||||||
|
free: parse('Pages free'),
|
||||||
|
pageSize,
|
||||||
|
};
|
||||||
|
} catch {
|
||||||
|
return {
|
||||||
|
active: 0,
|
||||||
|
wired: 0,
|
||||||
|
compressor: 0,
|
||||||
|
inactive: 0,
|
||||||
|
purgeable: 0,
|
||||||
|
speculative: 0,
|
||||||
|
free: 0,
|
||||||
|
pageSize: 16384,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function GET() {
|
||||||
|
const [processes, vmstat] = await Promise.all([getTopProcesses(25), getVmStatBreakdown()]);
|
||||||
|
|
||||||
|
// Group by process name and sum RSS (e.g. multiple Chrome helpers)
|
||||||
|
const grouped: Record<string, { rss: number; pctMem: number; count: number; pids: number[] }> =
|
||||||
|
{};
|
||||||
|
for (const p of processes) {
|
||||||
|
const key = p.name;
|
||||||
|
if (!grouped[key]) {
|
||||||
|
grouped[key] = { rss: 0, pctMem: 0, count: 0, pids: [] };
|
||||||
|
}
|
||||||
|
grouped[key].rss += p.rss;
|
||||||
|
grouped[key].pctMem += p.pctMem;
|
||||||
|
grouped[key].count += 1;
|
||||||
|
grouped[key].pids.push(p.pid);
|
||||||
|
}
|
||||||
|
|
||||||
|
const groupedProcesses = Object.entries(grouped)
|
||||||
|
.map(([name, info]) => ({
|
||||||
|
name,
|
||||||
|
rss: info.rss,
|
||||||
|
pctMem: Math.round(info.pctMem * 10) / 10,
|
||||||
|
count: info.count,
|
||||||
|
pids: info.pids,
|
||||||
|
}))
|
||||||
|
.sort((a, b) => b.rss - a.rss);
|
||||||
|
|
||||||
|
return NextResponse.json({
|
||||||
|
totalRam: os.totalmem(),
|
||||||
|
vmstat,
|
||||||
|
categories: {
|
||||||
|
active: vmstat.active,
|
||||||
|
wired: vmstat.wired,
|
||||||
|
compressor: vmstat.compressor,
|
||||||
|
inactive: vmstat.inactive,
|
||||||
|
purgeable: vmstat.purgeable,
|
||||||
|
speculative: vmstat.speculative,
|
||||||
|
free: vmstat.free,
|
||||||
|
},
|
||||||
|
processes: groupedProcesses,
|
||||||
|
});
|
||||||
|
}
|
||||||
@ -133,12 +133,13 @@ async function getAccurateMemory(): Promise<{
|
|||||||
|
|
||||||
const appMemory = active + wired + compressor;
|
const appMemory = active + wired + compressor;
|
||||||
const cached = inactive + purgeable + speculative;
|
const cached = inactive + purgeable + speculative;
|
||||||
const trueFree = free + cached; // macOS reclaims cached on demand
|
// Return raw free separately from cached — no overlap
|
||||||
|
// available for loading = free + cached (macOS reclaims cached on demand)
|
||||||
|
|
||||||
const ratio = appMemory / totalMem;
|
const ratio = appMemory / totalMem;
|
||||||
const pressure = ratio > 0.85 ? 'critical' : ratio > 0.7 ? 'warning' : 'normal';
|
const pressure = ratio > 0.85 ? 'critical' : ratio > 0.7 ? 'warning' : 'normal';
|
||||||
|
|
||||||
return { total: totalMem, appMemory, cached, free: trueFree, pressure };
|
return { total: totalMem, appMemory, cached, free, pressure };
|
||||||
} catch {
|
} catch {
|
||||||
// Fallback to Node.js (inaccurate on macOS but works everywhere)
|
// Fallback to Node.js (inaccurate on macOS but works everywhere)
|
||||||
const freeMem = os.freemem();
|
const freeMem = os.freemem();
|
||||||
|
|||||||
175
__LOCAL_LLMs/dashboard/src/app/api/tts/route.ts
Normal file
175
__LOCAL_LLMs/dashboard/src/app/api/tts/route.ts
Normal file
@ -0,0 +1,175 @@
|
|||||||
|
import { NextResponse } from 'next/server';
|
||||||
|
import { exec } from 'child_process';
|
||||||
|
import { promisify } from 'util';
|
||||||
|
import { access, stat, readdir } from 'fs/promises';
|
||||||
|
import { join, resolve } from 'path';
|
||||||
|
|
||||||
|
const execAsync = promisify(exec);
|
||||||
|
|
||||||
|
// process.cwd() = dashboard/, parent = __LOCAL_LLMs/
|
||||||
|
const LOCAL_LLMS_DIR = resolve(process.cwd(), '..');
|
||||||
|
|
||||||
|
interface TtsEngine {
|
||||||
|
name: string;
|
||||||
|
type: 'ollama' | 'python';
|
||||||
|
status: 'ready' | 'partial' | 'missing';
|
||||||
|
model: string;
|
||||||
|
size?: string;
|
||||||
|
voices?: string[];
|
||||||
|
details: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fileExists(path: string): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
await access(path);
|
||||||
|
return true;
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getFileSize(path: string): Promise<number> {
|
||||||
|
try {
|
||||||
|
const s = await stat(path);
|
||||||
|
return s.size;
|
||||||
|
} catch {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function checkOrpheus(): Promise<TtsEngine> {
|
||||||
|
const engine: TtsEngine = {
|
||||||
|
name: 'Orpheus TTS',
|
||||||
|
type: 'ollama',
|
||||||
|
status: 'missing',
|
||||||
|
model: 'sematre/orpheus:en',
|
||||||
|
voices: ['tara', 'leah', 'jess', 'leo', 'dan', 'mia', 'zac', 'zoe'],
|
||||||
|
details: '',
|
||||||
|
};
|
||||||
|
|
||||||
|
// Check if Orpheus model is in Ollama
|
||||||
|
let hasModel = false;
|
||||||
|
try {
|
||||||
|
const res = await fetch('http://localhost:11434/api/tags', {
|
||||||
|
signal: AbortSignal.timeout(2000),
|
||||||
|
});
|
||||||
|
if (res.ok) {
|
||||||
|
const data = await res.json();
|
||||||
|
hasModel = data.models?.some((m: { name: string }) => m.name.includes('orpheus')) ?? false;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Ollama not running
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check SNAC decoder
|
||||||
|
const snacPath = join(LOCAL_LLMS_DIR, 'models', 'snac_24khz', 'pytorch_model.bin');
|
||||||
|
const hasSnac = await fileExists(snacPath);
|
||||||
|
const snacSize = hasSnac ? await getFileSize(snacPath) : 0;
|
||||||
|
|
||||||
|
// Check Python venv
|
||||||
|
const venvPython = join(LOCAL_LLMS_DIR, '.venv-qwen-tts', 'bin', 'python');
|
||||||
|
const hasVenv = await fileExists(venvPython);
|
||||||
|
|
||||||
|
if (hasModel && hasSnac && hasVenv) {
|
||||||
|
engine.status = 'ready';
|
||||||
|
engine.size = `${(snacSize / 1e6).toFixed(0)} MB decoder`;
|
||||||
|
engine.details = 'Ollama model + SNAC decoder + Python venv';
|
||||||
|
} else if (hasModel) {
|
||||||
|
engine.status = 'partial';
|
||||||
|
const missing: string[] = [];
|
||||||
|
if (!hasSnac) missing.push('SNAC decoder');
|
||||||
|
if (!hasVenv) missing.push('Python venv');
|
||||||
|
engine.details = `Missing: ${missing.join(', ')}`;
|
||||||
|
} else {
|
||||||
|
engine.status = 'missing';
|
||||||
|
engine.details = 'Run: bash setup-tts.sh';
|
||||||
|
}
|
||||||
|
|
||||||
|
return engine;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function checkQwenTts(): Promise<TtsEngine> {
|
||||||
|
const engine: TtsEngine = {
|
||||||
|
name: 'Qwen3-TTS',
|
||||||
|
type: 'python',
|
||||||
|
status: 'missing',
|
||||||
|
model: 'Qwen3-TTS-12Hz-0.6B-CustomVoice',
|
||||||
|
details: '',
|
||||||
|
};
|
||||||
|
|
||||||
|
const modelDir = join(LOCAL_LLMS_DIR, 'models', 'Qwen3-TTS-12Hz-0.6B-CustomVoice');
|
||||||
|
const tokenizerDir = join(LOCAL_LLMS_DIR, 'models', 'Qwen3-TTS-Tokenizer-12Hz');
|
||||||
|
|
||||||
|
let hasModel = false;
|
||||||
|
let modelSize = 0;
|
||||||
|
try {
|
||||||
|
const files = await readdir(modelDir);
|
||||||
|
const safetensors = files.find(f => f.endsWith('.safetensors'));
|
||||||
|
if (safetensors) {
|
||||||
|
hasModel = true;
|
||||||
|
modelSize = await getFileSize(join(modelDir, safetensors));
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// dir doesn't exist
|
||||||
|
}
|
||||||
|
|
||||||
|
const hasTokenizer = await fileExists(join(tokenizerDir, 'config.json'));
|
||||||
|
const venvPython = join(LOCAL_LLMS_DIR, '.venv-qwen-tts', 'bin', 'python');
|
||||||
|
const hasVenv = await fileExists(venvPython);
|
||||||
|
|
||||||
|
if (hasModel && hasTokenizer && hasVenv) {
|
||||||
|
engine.status = 'ready';
|
||||||
|
engine.size = `${(modelSize / 1e9).toFixed(1)} GB`;
|
||||||
|
engine.details = '0.6B params · 10 languages · MPS/CPU';
|
||||||
|
} else if (hasModel || hasTokenizer) {
|
||||||
|
engine.status = 'partial';
|
||||||
|
const missing: string[] = [];
|
||||||
|
if (!hasModel) missing.push('model weights');
|
||||||
|
if (!hasTokenizer) missing.push('tokenizer');
|
||||||
|
if (!hasVenv) missing.push('Python venv');
|
||||||
|
engine.details = `Missing: ${missing.join(', ')}`;
|
||||||
|
} else {
|
||||||
|
engine.status = 'missing';
|
||||||
|
engine.details = 'Run: bash setup-tts.sh';
|
||||||
|
}
|
||||||
|
|
||||||
|
return engine;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function checkVenv(): Promise<{
|
||||||
|
exists: boolean;
|
||||||
|
python?: string;
|
||||||
|
packages?: string[];
|
||||||
|
}> {
|
||||||
|
const venvPython = join(LOCAL_LLMS_DIR, '.venv-qwen-tts', 'bin', 'python');
|
||||||
|
const exists = await fileExists(venvPython);
|
||||||
|
if (!exists) return { exists: false };
|
||||||
|
|
||||||
|
try {
|
||||||
|
const { stdout } = await execAsync(
|
||||||
|
`"${venvPython}" -c "import snac; import torch; print(f'snac={snac.__version__} torch={torch.__version__}')"`,
|
||||||
|
{ timeout: 5000 }
|
||||||
|
);
|
||||||
|
return {
|
||||||
|
exists: true,
|
||||||
|
python: venvPython,
|
||||||
|
packages: stdout.trim().split(' '),
|
||||||
|
};
|
||||||
|
} catch {
|
||||||
|
return { exists: true, python: venvPython };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function GET() {
|
||||||
|
const [orpheus, qwenTts, venv] = await Promise.all([checkOrpheus(), checkQwenTts(), checkVenv()]);
|
||||||
|
|
||||||
|
return NextResponse.json({
|
||||||
|
engines: [orpheus, qwenTts],
|
||||||
|
venv,
|
||||||
|
setupScript: 'bash setup-tts.sh',
|
||||||
|
testCommands: {
|
||||||
|
orpheus: '.venv-qwen-tts/bin/python test_orpheus_tts.py',
|
||||||
|
qwenTts: '.venv-qwen-tts/bin/python test_qwen_tts.py',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
@ -19,13 +19,15 @@ export function estimateRam(diskSize: number, quant?: string): number {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// N2: Check if model fits in available memory
|
// N2: Check if model fits in available memory
|
||||||
|
// free = raw free pages, cached = inactive+purgeable+speculative (no overlap)
|
||||||
|
// macOS reclaims ~90% of cached on demand for large allocations (model mmaps)
|
||||||
export type FitStatus = 'fits' | 'tight' | 'no';
|
export type FitStatus = 'fits' | 'tight' | 'no';
|
||||||
export function checkMemoryFit(
|
export function checkMemoryFit(
|
||||||
estimatedRam: number,
|
estimatedRam: number,
|
||||||
freeMemory: number,
|
freeMemory: number,
|
||||||
cachedMemory: number
|
cachedMemory: number
|
||||||
): FitStatus {
|
): FitStatus {
|
||||||
const available = freeMemory + cachedMemory * 0.5;
|
const available = freeMemory + cachedMemory * 0.9;
|
||||||
const ratio = estimatedRam / available;
|
const ratio = estimatedRam / available;
|
||||||
if (ratio < 0.7) return 'fits';
|
if (ratio < 0.7) return 'fits';
|
||||||
if (ratio <= 1.0) return 'tight';
|
if (ratio <= 1.0) return 'tight';
|
||||||
|
|||||||
@ -11,9 +11,12 @@ This machine runs a local LLM server via [Ollama](https://ollama.com), exposing
|
|||||||
**Models installed:**
|
**Models installed:**
|
||||||
|
|
||||||
| Model | Size | Best For |
|
| Model | Size | Best For |
|
||||||
| ------------------- | ------- | ----------------------------------------- |
|
| -------------------- | ------ | -------------------------------------------- |
|
||||||
| `qwen2.5-coder:32b` | 18.5 GB | Code (TS, Python, Swift), structured JSON |
|
| `qwen2.5-coder:32b` | 19 GB | Code (TS, Python, Swift), structured JSON |
|
||||||
| `llama3.1:8b` | 4.7 GB | Fast evals, general tasks |
|
| `qwen2.5-coder:7b` | 4.7 GB | Fast code tasks, fits alongside other models |
|
||||||
|
| `deepseek-r1:32b` | 19 GB | Complex reasoning, chain-of-thought |
|
||||||
|
| `llama3.1:8b` | 4.9 GB | Fast evals, general tasks |
|
||||||
|
| `sematre/orpheus:en` | 4 GB | Text-to-speech (8 voices, emotion tags) |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
@ -1,17 +1,103 @@
|
|||||||
# 05 — Mission Control Dashboard
|
# 05 — Mission Control Dashboard
|
||||||
|
|
||||||
> **Documentation has moved.** All dashboard docs now live in the dashboard directory.
|
> Next.js 16 dashboard for managing local LLM models, system resources, and inference.
|
||||||
|
> Last updated: 2026-02-21
|
||||||
- **PRD:** [`__LOCAL_LLMs/dashboard/docs/DASHBOARD_PRD.md`](../dashboard/docs/DASHBOARD_PRD.md)
|
|
||||||
- **Review (39 items):** [`__LOCAL_LLMs/dashboard/docs/DASHBOARD_REVIEW.md`](../dashboard/docs/DASHBOARD_REVIEW.md)
|
|
||||||
- **Roadmap (N1–N15):** [`__LOCAL_LLMs/dashboard/docs/DASHBOARD_ROADMAP.md`](../dashboard/docs/DASHBOARD_ROADMAP.md)
|
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd __LOCAL_LLMs/dashboard
|
cd __LOCAL_LLMs/dashboard
|
||||||
npm install # first time only
|
npm install # first time only
|
||||||
npm run dev -- -p 3100
|
npm run dev # runs on port 3000
|
||||||
```
|
```
|
||||||
|
|
||||||
Open: **http://localhost:3100**
|
Open: **http://localhost:3000**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Recent Changes (Feb 2026)
|
||||||
|
|
||||||
|
### Memory Calculation Fix
|
||||||
|
|
||||||
|
**Root cause:** The system API (`/api/system`) computed `trueFree = free + cached` and returned it as `free`. This made `free` and `cached` overlap. The UI then did `available = free + cached * 0.5`, which **double-counted** cached memory and inflated available RAM by ~8 GB.
|
||||||
|
|
||||||
|
**Fix (4 files):**
|
||||||
|
|
||||||
|
- `src/app/api/system/route.ts` — Return raw `Pages free` separately from `cached` (no overlap)
|
||||||
|
- `src/app/lib/format.ts` — Updated `checkMemoryFit()` to use `cached × 0.9` (macOS reclaims ~90% on demand)
|
||||||
|
- `src/app/(mission-control)/mission-control/page.tsx` — All UI memory references fixed
|
||||||
|
- `src/app/(mission-control)/mission-control/components/RamBudgetBar.tsx` — Receives corrected `free + cached`
|
||||||
|
|
||||||
|
**Memory formula:** `available for models = rawFree + cached × 0.9`
|
||||||
|
|
||||||
|
### Memory Drilldown
|
||||||
|
|
||||||
|
Click the **MEMORY** card in the status bar to toggle a drilldown panel showing:
|
||||||
|
|
||||||
|
1. **Stacked bar** — vm_stat categories (Active, Wired, Compressed, Inactive, Purgeable, Free)
|
||||||
|
2. **Legend grid** — exact bytes + percentage for each category
|
||||||
|
3. **App memory summary** — Active + Wired + Compressed = total used
|
||||||
|
4. **Top 15 processes by RSS** — grouped by name, Ollama highlighted in green
|
||||||
|
|
||||||
|
**New files:**
|
||||||
|
|
||||||
|
- `src/app/api/system/memory/route.ts` — Process memory API (`ps` + `vm_stat`)
|
||||||
|
- `src/app/(mission-control)/mission-control/components/MemoryDrilldown.tsx` — Drilldown UI
|
||||||
|
|
||||||
|
### Simplified Memory UI
|
||||||
|
|
||||||
|
All memory displays now use consistent, plain language:
|
||||||
|
|
||||||
|
| Element | Before (confusing) | After (clear) |
|
||||||
|
| -------------------- | ---------------------------------- | ------------------------------------------- |
|
||||||
|
| **MEMORY card** | "10.5 GB / 48 GB" (ambiguous) | **"35.6 GB used / 48 GB"** |
|
||||||
|
| **Subtitle** | "App: 35.6 GB · Cache: 11.6 GB" | **"10.5 GB available for models"** (green) |
|
||||||
|
| **Model fit** | "76 MB free + 10.5 GB reclaimable" | **"Needs ~22 GB · 10.5 GB available"** |
|
||||||
|
| **Fit badge** | "✗ Won't fit" | **"✗ 11.6 GB short"** (with exact gap) |
|
||||||
|
| **System panel RAM** | "76 MB avail" | **"10.5 GB avail"** (green, matches header) |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Detailed Documentation
|
||||||
|
|
||||||
|
- **PRD:** [`dashboard/docs/DASHBOARD_PRD.md`](../dashboard/docs/DASHBOARD_PRD.md)
|
||||||
|
- **Review (39 items):** [`dashboard/docs/DASHBOARD_REVIEW.md`](../dashboard/docs/DASHBOARD_REVIEW.md)
|
||||||
|
- **Roadmap (N1–N15):** [`dashboard/docs/DASHBOARD_ROADMAP.md`](../dashboard/docs/DASHBOARD_ROADMAP.md)
|
||||||
|
- **Rich Features Roadmap (A–G):** [`dashboard/docs/RICH_FEATURES_ROADMAP.md`](../dashboard/docs/RICH_FEATURES_ROADMAP.md)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## API Routes
|
||||||
|
|
||||||
|
| Route | Method | Description |
|
||||||
|
| -------------------- | -------- | ---------------------------------------------------- |
|
||||||
|
| `/api/ollama` | GET/POST | Ollama proxy (list, load, unload, generate) |
|
||||||
|
| `/api/whisper` | GET | Whisper binary/model discovery |
|
||||||
|
| `/api/system` | GET | System info (chip, RAM, disk, brew, pressure) |
|
||||||
|
| `/api/system/memory` | GET | Memory drilldown (vm_stat breakdown + top processes) |
|
||||||
|
| `/api/system/exec` | POST | Safe shell command execution |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Key Components
|
||||||
|
|
||||||
|
```
|
||||||
|
dashboard/src/app/
|
||||||
|
├── (mission-control)/mission-control/
|
||||||
|
│ ├── page.tsx # Main Mission Control page
|
||||||
|
│ └── components/
|
||||||
|
│ ├── RamBudgetBar.tsx # Stacked RAM budget visualization
|
||||||
|
│ ├── MemoryDrilldown.tsx # Process-level memory breakdown
|
||||||
|
│ └── MarkdownResponse.tsx # Markdown renderer for LLM output
|
||||||
|
├── (workspace)/components/ # Chat workspace (conversations, messages)
|
||||||
|
├── api/
|
||||||
|
│ ├── ollama/route.ts
|
||||||
|
│ ├── whisper/route.ts
|
||||||
|
│ ├── system/route.ts
|
||||||
|
│ └── system/memory/route.ts
|
||||||
|
└── lib/
|
||||||
|
├── format.ts # formatBytes, estimateRam, checkMemoryFit
|
||||||
|
├── db.ts # IndexedDB CRUD (conversations, projects, tasks)
|
||||||
|
├── cron.ts # Cron expression parser
|
||||||
|
└── scheduled-tasks.ts # Built-in task templates
|
||||||
|
```
|
||||||
|
|||||||
@ -20,18 +20,40 @@ This machine is behind an AT&T Forcepoint proxy that performs SSL deep packet in
|
|||||||
### What Works Through Proxy
|
### What Works Through Proxy
|
||||||
|
|
||||||
| Tool | Status | Notes |
|
| Tool | Status | Notes |
|
||||||
| -------------------------- | ---------- | ------------------------------------- |
|
| -------------------------- | ---------- | ------------------------------------------- |
|
||||||
| `ollama pull` | ✅ Works | Ollama handles proxy natively |
|
| `ollama pull` | ✅ Works | Ollama handles proxy natively |
|
||||||
| `brew install` | ✅ Works | Homebrew handles proxy |
|
| `brew install` | ✅ Works | Homebrew handles proxy |
|
||||||
| `npm install` | ✅ Works | With `NODE_TLS_REJECT_UNAUTHORIZED=0` |
|
| `npm install` | ✅ Works | With `NODE_TLS_REJECT_UNAUTHORIZED=0` |
|
||||||
|
| `git clone` (GitHub) | ✅ Works | With `GIT_SSL_NO_VERIFY=1` |
|
||||||
|
| `pip install` (PyPI) | ✅ Works | Via corporate Artifactory mirror |
|
||||||
|
| **`hf-mirror.com`** | ✅ Works | Chinese HuggingFace mirror, **not blocked** |
|
||||||
| `curl` to Hugging Face | ❌ Blocked | Returns 19 KB HTML redirect page |
|
| `curl` to Hugging Face | ❌ Blocked | Returns 19 KB HTML redirect page |
|
||||||
| `curl -k` to Hugging Face | ❌ Blocked | Still intercepted even with `-k` |
|
| `curl -k` to Hugging Face | ❌ Blocked | Still intercepted even with `-k` |
|
||||||
| `python requests` to HF | ❌ Blocked | SSL_CERTIFICATE_VERIFY_FAILED |
|
| `python requests` to HF | ❌ Blocked | SSL_CERTIFICATE_VERIFY_FAILED |
|
||||||
| `huggingface_hub` download | ❌ Blocked | Falls back to cached (broken) files |
|
| `huggingface_hub` download | ❌ Blocked | Falls back to cached (broken) files |
|
||||||
|
|
||||||
### Workaround: Download Off-Network
|
### Workaround 1: Use hf-mirror.com (recommended)
|
||||||
|
|
||||||
For Hugging Face model downloads (e.g., Whisper GGML files):
|
`hf-mirror.com` is a Chinese mirror of HuggingFace that **is NOT blocked** by Forcepoint. Replace `huggingface.co` with `hf-mirror.com` in any download URL:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Instead of: https://huggingface.co/org/model/resolve/main/file.bin
|
||||||
|
# Use: https://hf-mirror.com/org/model/resolve/main/file.bin
|
||||||
|
|
||||||
|
# Example: download SNAC decoder (TTS)
|
||||||
|
curl -k -L -o models/snac_24khz/pytorch_model.bin \
|
||||||
|
"https://hf-mirror.com/hubertsiuzdak/snac_24khz/resolve/main/pytorch_model.bin"
|
||||||
|
|
||||||
|
# Example: download Whisper model
|
||||||
|
curl -k -L -o ~/whisper-models/ggml-large-v3-turbo.bin \
|
||||||
|
"https://hf-mirror.com/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo.bin"
|
||||||
|
```
|
||||||
|
|
||||||
|
The TTS scripts (`setup-tts.sh`, `download-tts-models.sh`) use this mirror automatically.
|
||||||
|
|
||||||
|
### Workaround 2: Download Off-Network
|
||||||
|
|
||||||
|
If the mirror is also blocked, use a non-corporate network:
|
||||||
|
|
||||||
1. **Disconnect** from corporate VPN/Wi-Fi
|
1. **Disconnect** from corporate VPN/Wi-Fi
|
||||||
2. **Connect** to personal hotspot or home Wi-Fi
|
2. **Connect** to personal hotspot or home Wi-Fi
|
||||||
|
|||||||
230
__LOCAL_LLMs/docs/10-text-to-speech.md
Normal file
230
__LOCAL_LLMs/docs/10-text-to-speech.md
Normal file
@ -0,0 +1,230 @@
|
|||||||
|
# 10 — Text-to-Speech (TTS) — Local Setup
|
||||||
|
|
||||||
|
> Local TTS on Apple Silicon: Orpheus TTS via Ollama + Qwen3-TTS 0.6B direct.
|
||||||
|
> Works through corporate proxy via `hf-mirror.com`.
|
||||||
|
> Last updated: 2026-02-21
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
Two TTS engines for local speech generation — both run fully offline after initial setup.
|
||||||
|
|
||||||
|
| Engine | Model | Size | How It Runs | Quality | Speed |
|
||||||
|
| --------------- | --------------------------------- | ------ | ----------------------- | ------------------------------------------ | ------------------------ |
|
||||||
|
| **Orpheus TTS** | `sematre/orpheus:en` | 4 GB | Via Ollama (Metal GPU) | Great — expressive, 8 voices, emotion tags | ~11s for short sentences |
|
||||||
|
| **Qwen3-TTS** | `Qwen3-TTS-12Hz-0.6B-CustomVoice` | 1.2 GB | Direct Python (MPS/CPU) | Excellent — 10 languages, voice design | ~10-20s on MPS |
|
||||||
|
|
||||||
|
### Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
Text → Ollama (Orpheus 3B) → Audio Tokens → SNAC Decoder → WAV file
|
||||||
|
Text → Qwen3-TTS 0.6B (PyTorch MPS) → WAV file
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Quick Start (Fresh Laptop)
|
||||||
|
|
||||||
|
The **one-shot setup script** handles everything — works on any Apple Silicon Mac, including through corporate proxy:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd __LOCAL_LLMs
|
||||||
|
bash setup-tts.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
This installs: Python 3.12, venv, pip packages, Orpheus model (Ollama), SNAC decoder (hf-mirror.com), and optionally Qwen3-TTS 0.6B.
|
||||||
|
|
||||||
|
After setup:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
.venv-qwen-tts/bin/python test_orpheus_tts.py
|
||||||
|
afplay test_orpheus_tara.wav
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
| Component | How to Install | Notes |
|
||||||
|
| ------------------------- | ---------------------------------- | ------------------------------ |
|
||||||
|
| **macOS + Apple Silicon** | — | M1/M2/M3/M4 (MPS acceleration) |
|
||||||
|
| **Homebrew** | `/bin/bash -c "$(curl -fsSL ...)"` | Package manager |
|
||||||
|
| **Ollama** | `brew install ollama` | Local LLM server |
|
||||||
|
| **Python 3.12** | `brew install python@3.12` | TTS packages need 3.12 |
|
||||||
|
|
||||||
|
All of the above are installed automatically by `setup-tts.sh`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Manual Setup (step by step)
|
||||||
|
|
||||||
|
If you prefer to run each step yourself instead of `setup-tts.sh`:
|
||||||
|
|
||||||
|
### 1. Python Environment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd __LOCAL_LLMs
|
||||||
|
|
||||||
|
# Install Python 3.12
|
||||||
|
brew install python@3.12
|
||||||
|
|
||||||
|
# Create isolated venv
|
||||||
|
/opt/homebrew/bin/python3.12 -m venv .venv-qwen-tts
|
||||||
|
|
||||||
|
# Install packages
|
||||||
|
.venv-qwen-tts/bin/pip install -U snac qwen-tts
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Orpheus TTS Model (via Ollama)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ollama serve & # start Ollama if not running
|
||||||
|
ollama pull sematre/orpheus:en # 4 GB, via Ollama registry (works through proxy)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. SNAC Audio Decoder
|
||||||
|
|
||||||
|
Downloads via `hf-mirror.com` — **works through corporate proxy**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash download-tts-models.sh snac # just SNAC (~76 MB)
|
||||||
|
```
|
||||||
|
|
||||||
|
Or manually:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mkdir -p models/snac_24khz
|
||||||
|
curl -k -sL -o models/snac_24khz/config.json \
|
||||||
|
"https://hf-mirror.com/hubertsiuzdak/snac_24khz/raw/main/config.json"
|
||||||
|
curl -k -L --progress-bar -o models/snac_24khz/pytorch_model.bin \
|
||||||
|
"https://hf-mirror.com/hubertsiuzdak/snac_24khz/resolve/main/pytorch_model.bin"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Qwen3-TTS 0.6B (optional)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash download-tts-models.sh qwen # tokenizer + model (~1.7 GB)
|
||||||
|
```
|
||||||
|
|
||||||
|
After download everything runs **fully offline**.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
### Orpheus TTS (via Ollama)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Make sure Ollama is running
|
||||||
|
ollama serve &
|
||||||
|
|
||||||
|
# Run test
|
||||||
|
.venv-qwen-tts/bin/python test_orpheus_tts.py
|
||||||
|
|
||||||
|
# Play output
|
||||||
|
afplay test_orpheus_tara.wav
|
||||||
|
```
|
||||||
|
|
||||||
|
**Voices:** `tara`, `leah`, `jess`, `leo`, `dan`, `mia`, `zac`, `zoe`
|
||||||
|
|
||||||
|
**Emotion tags:** `<laugh>`, `<chuckle>`, `<sigh>`, `<cough>`, `<sniffle>`, `<groan>`, `<yawn>`, `<gasp>`
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Example prompt format
|
||||||
|
voice = "tara"
|
||||||
|
text = "<laugh> That's hilarious! Tell me more."
|
||||||
|
prompt = f"<custom_token_3><|begin_of_text|>{voice}: {text}<|eot_id|><custom_token_4><custom_token_5><custom_token_1>"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Qwen3-TTS (direct Python)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
.venv-qwen-tts/bin/python test_qwen_tts.py
|
||||||
|
afplay test_output_english.wav
|
||||||
|
```
|
||||||
|
|
||||||
|
**Features:**
|
||||||
|
|
||||||
|
- 10 languages (Chinese, English, Japanese, Korean, German, French, Russian, Portuguese, Spanish, Italian)
|
||||||
|
- Built-in speaker voices (Chelsie, Vivian, Ryan, etc.)
|
||||||
|
- Natural language emotion control: `instruct="Speak with excitement"`
|
||||||
|
- Voice cloning from a short audio sample (with Base model variant)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## File Inventory
|
||||||
|
|
||||||
|
```
|
||||||
|
__LOCAL_LLMs/
|
||||||
|
├── setup-tts.sh # ← START HERE — one-shot setup for fresh laptop
|
||||||
|
├── download-tts-models.sh # Download model weights (uses hf-mirror.com)
|
||||||
|
├── test_orpheus_tts.py # Orpheus TTS test (Ollama + SNAC)
|
||||||
|
├── test_qwen_tts.py # Qwen3-TTS test (direct Python)
|
||||||
|
├── .venv-qwen-tts/ # Python 3.12 venv (gitignored, created by setup)
|
||||||
|
├── models/ # Downloaded model weights (gitignored)
|
||||||
|
│ ├── snac_24khz/ # SNAC audio decoder (~76 MB)
|
||||||
|
│ ├── Qwen3-TTS-Tokenizer-12Hz/ # Qwen3-TTS tokenizer (optional)
|
||||||
|
│ └── Qwen3-TTS-12Hz-0.6B-CustomVoice/ # Qwen3-TTS model (~1.2 GB, optional)
|
||||||
|
└── *.wav # Generated audio output (gitignored)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## OSS TTS Landscape (as of Feb 2026)
|
||||||
|
|
||||||
|
### Speech-to-Text (STT)
|
||||||
|
|
||||||
|
| Model | By | Notes |
|
||||||
|
| ------------------------- | ------------------ | --------------------------------------------------- |
|
||||||
|
| **Whisper / whisper-cpp** | OpenAI / ggerganov | Gold standard, already installed, Metal-accelerated |
|
||||||
|
| **Faster Whisper** | SYSTRAN | 4× faster via CTranslate2 |
|
||||||
|
| **Distil-Whisper** | Hugging Face | 6× faster, 49% fewer params |
|
||||||
|
|
||||||
|
### Text-to-Speech (TTS)
|
||||||
|
|
||||||
|
| Model | By | Size | Notes |
|
||||||
|
| ---------------- | ------------ | --------- | ------------------------------------------------------- |
|
||||||
|
| **Qwen3-TTS** ⭐ | Alibaba | 0.6B–1.7B | Best quality, 10 languages, voice cloning, Jan 2026 |
|
||||||
|
| **Orpheus TTS** | Canopy AI | 3B | Expressive, 8 voices, emotion tags, available on Ollama |
|
||||||
|
| **Kokoro** | HF Community | 82M | Very fast, near-commercial quality, Apache 2.0 |
|
||||||
|
| **Piper** | Rhasspy | ONNX | Lightweight, runs on Raspberry Pi |
|
||||||
|
| **F5-TTS** | SWivid | — | Zero-shot voice cloning, flow matching |
|
||||||
|
| **StyleTTS 2** | Columbia U | — | Human-level quality, style diffusion |
|
||||||
|
| **OuteTTS** | Community | — | Pure LLM-based TTS, runs via llama.cpp |
|
||||||
|
| **Bark** | Suno | — | Speech + music + sound effects |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Corporate Proxy Notes
|
||||||
|
|
||||||
|
| Source | Status | Workaround |
|
||||||
|
| ------------------------------------------ | ---------- | --------------------------------------------------- |
|
||||||
|
| **Ollama registry** (`registry.ollama.ai`) | ✅ Works | Ollama pull uses its own CDN |
|
||||||
|
| **PyPI** (via `artifact.it.att.com`) | ✅ Works | Corporate Artifactory mirror |
|
||||||
|
| **GitHub releases** | ✅ Works | Direct download |
|
||||||
|
| **HuggingFace** (`huggingface.co`) | ❌ Blocked | Use `hf-mirror.com` as mirror (works through proxy) |
|
||||||
|
| **hf-mirror.com** (HF mirror) | ✅ Works | Chinese HF mirror, not blocked by Forcepoint |
|
||||||
|
|
||||||
|
Forcepoint CSO intercepts HTTPS and serves a block page for HuggingFace. No SSL workaround works for `huggingface.co`. However, **`hf-mirror.com`** (a Chinese mirror of HuggingFace) is **not blocked** and can be used to download model weights:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Download SNAC config + weights via mirror
|
||||||
|
curl -k -L -o models/snac_24khz/config.json "https://hf-mirror.com/hubertsiuzdak/snac_24khz/raw/main/config.json"
|
||||||
|
curl -k -L -o models/snac_24khz/pytorch_model.bin "https://hf-mirror.com/hubertsiuzdak/snac_24khz/resolve/main/pytorch_model.bin"
|
||||||
|
```
|
||||||
|
|
||||||
|
All other sources (Ollama, pip, GitHub) also work fine through the proxy.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
| Problem | Fix |
|
||||||
|
| --------------------------------------------- | ----------------------------------------------------------------------------- |
|
||||||
|
| `OSError: couldn't connect to huggingface.co` | Use `hf-mirror.com` or run `bash setup-tts.sh` |
|
||||||
|
| `SNAC decoder not found` | Run `bash setup-tts.sh` or `bash download-tts-models.sh snac` |
|
||||||
|
| `Model not found at models/Qwen3-TTS-*` | Run `bash setup-tts.sh` or `bash download-tts-models.sh qwen` |
|
||||||
|
| Orpheus generates no audio tokens | Ensure `ollama serve` is running and `ollama list` shows `sematre/orpheus:en` |
|
||||||
|
| MPS out of memory for Qwen3-TTS | Close other apps (Windsurf uses ~18 GB). Or use `device="cpu"` in test script |
|
||||||
|
| Slow generation on CPU | Expected for 0.6B model. MPS should be ~2-3× faster |
|
||||||
@ -1,310 +0,0 @@
|
|||||||
# Mission Control Dashboard — Bug & Improvement Review
|
|
||||||
|
|
||||||
> Systematic code review of `__LOCAL_LLMs/dashboard/` (6 source files, 1,395 lines)
|
|
||||||
> Last updated: Feb 19, 2026
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## File Inventory
|
|
||||||
|
|
||||||
| File | Lines | Purpose |
|
|
||||||
| ------------------------------------ | ----- | -------------------------------------------------------------------- |
|
|
||||||
| `src/app/page.tsx` | 1,079 | Main dashboard UI (single component) |
|
|
||||||
| `src/app/globals.css` | 91 | Design tokens, animations, base styles |
|
|
||||||
| `src/app/layout.tsx` | 20 | Root layout (metadata, dark mode) |
|
|
||||||
| `src/app/api/ollama/route.ts` | 117 | Ollama REST proxy (list, load, unload, pull, delete, show, generate) |
|
|
||||||
| `src/app/api/ollama/stream/route.ts` | 38 | Ollama streaming generate proxy (NDJSON) |
|
|
||||||
| `src/app/api/whisper/route.ts` | 66 | Whisper binary + GGML model discovery |
|
|
||||||
| `src/app/api/system/route.ts` | 162 | System info (chip, memory via vm_stat, disk, brew) |
|
|
||||||
|
|
||||||
**Stack:** Next.js 16, React 19, TailwindCSS v4, Lucide icons, TypeScript
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 1. Bugs
|
|
||||||
|
|
||||||
- [x] **B1. Hardcoded machine specs in header** — `page.tsx:317`
|
|
||||||
Subtitle reads `Apple M4 Pro · 48 GB · {system?.platform}` — should use `system?.chip` and `formatBytes(system?.memory.total)` dynamically so it works on any machine.
|
|
||||||
|
|
||||||
- [x] **B2. Pull model blocks UI — no progress feedback** — `api/ollama/route.ts:84-92`
|
|
||||||
`handlePull` calls Ollama with `stream: false`. Large models (20+ GB) block for 30+ minutes. The Next.js API route will likely timeout. Must use `stream: true` and pipe progress events to the client. _(Combined with F1.)_
|
|
||||||
|
|
||||||
- [x] **B3. Dead code: non-streaming `generate` action** — `api/ollama/route.ts:69-82`
|
|
||||||
The `action === 'generate'` handler is unused — UI only uses `/api/ollama/stream`. Remove or keep as fallback with a comment.
|
|
||||||
|
|
||||||
- [x] **B4. Escape key closes modal during active streaming** — `page.tsx:188-197`
|
|
||||||
Global `keydown` handler calls `setPromptModel(null)` unconditionally. Backdrop click correctly checks `!promptLoading`. Escape should also respect `promptLoading` to prevent discarding an in-flight response.
|
|
||||||
|
|
||||||
- [x] **B5. Auto-refresh (15s) fires during streaming/pull** — `page.tsx:182-185`
|
|
||||||
`setInterval(fetchAll, 15000)` runs unconditionally. During streaming this causes background churn and potential UI flicker. Should pause while `promptLoading` or `pullLoading` is true.
|
|
||||||
|
|
||||||
- [x] **B6. Toast ID collision on HMR remount** — `page.tsx:156-159`
|
|
||||||
`toastId.current` resets to 0 on component remount during dev. Use `Date.now()` or `crypto.randomUUID()` for robust uniqueness.
|
|
||||||
|
|
||||||
- [x] **B7. vm_stat page size hardcoded** — `api/system/route.ts:103`
|
|
||||||
Hardcoded `16384`. Should parse from vm_stat's first line: `"(page size of NNNNN bytes)"` for portability.
|
|
||||||
|
|
||||||
- [x] **B8. Whisper models dir not configurable** — `api/whisper/route.ts:24`
|
|
||||||
Hardcoded to `~/whisper-models`. Should scan multiple known paths (`/opt/homebrew/share/whisper-cpp/models/`, `~/whisper-models`, `~/.cache/whisper/`) or accept `WHISPER_MODELS_DIR` env var.
|
|
||||||
|
|
||||||
- [x] **B9. No AbortController for streaming fetch** — `page.tsx:250-289`
|
|
||||||
Closing the prompt modal doesn't cancel the underlying fetch. The `reader.read()` loop continues in the background wasting CPU/bandwidth until the model finishes generating.
|
|
||||||
|
|
||||||
- [x] **B10. Brew shows "Loading..." when array is empty** — `page.tsx:936-940`
|
|
||||||
When `system.brewPackages` is `[]` (all uninstalled), displays "Loading..." instead of "No packages found". Needs to distinguish "still fetching" vs "fetched but empty".
|
|
||||||
|
|
||||||
- [x] **B11. Prompt text not cleared on close without send** — `page.tsx:951-957`
|
|
||||||
Backdrop click clears `promptText`, but Escape handler (B4 fix) should also clear it. Otherwise stale text persists when re-opening.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 2. Code Quality
|
|
||||||
|
|
||||||
- [x] **CQ1. Monolithic 1,079-line single component** — `page.tsx`
|
|
||||||
All interfaces, utilities, sub-components, and 900+ lines of JSX in one file. Extract to:
|
|
||||||
- `components/` — StatusDot, ProgressBar, ToastContainer, PromptModal, OllamaModelsPanel, SystemPanel, WhisperPanel, BrewPanel
|
|
||||||
- `lib/types.ts` — interfaces (OllamaModel, SystemData, etc.)
|
|
||||||
- `lib/format.ts` — formatBytes, formatUptime
|
|
||||||
- `lib/hooks.ts` — useAutoRefresh, useToasts, useOllamaActions
|
|
||||||
|
|
||||||
- [x] **CQ2. Pervasive inline styles instead of CSS/Tailwind classes** — `page.tsx` (100+ occurrences)
|
|
||||||
Every `style={{ color: 'var(--text-tertiary)' }}` should be a utility class. Options: custom Tailwind theme mapping, or CSS utility classes in `globals.css` (e.g., `.text-muted`).
|
|
||||||
|
|
||||||
- [x] **CQ3. OLLAMA_URL duplicated** — `api/ollama/route.ts:3` + `api/ollama/stream/route.ts:3`
|
|
||||||
Same `process.env.OLLAMA_URL || 'http://localhost:11434'` in two files. Extract to `lib/ollama-config.ts`.
|
|
||||||
|
|
||||||
- [x] **CQ4. No React Error Boundary** — `page.tsx`
|
|
||||||
Unexpected API response shape crashes the entire dashboard. Add an `error.tsx` (Next.js App Router convention) for graceful recovery.
|
|
||||||
|
|
||||||
- [x] **CQ5. No loading skeleton / shimmer UI**
|
|
||||||
Initial load shows "..." placeholders. Skeleton cards would be more polished.
|
|
||||||
|
|
||||||
- [x] **CQ6. No TypeScript strict null checks in API responses**
|
|
||||||
API route handlers catch errors but return loosely typed JSON. Add Zod validation on the Ollama/system responses to prevent runtime surprises.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 3. Features
|
|
||||||
|
|
||||||
- [x] **F1. Streaming pull with progress bar** _(fixes B2)_
|
|
||||||
Use Ollama `stream: true` for `/api/pull`. Create `/api/ollama/pull/route.ts` that pipes NDJSON progress. UI shows progress bar with `completed/total` bytes, speed, and ETA.
|
|
||||||
|
|
||||||
- [x] **F2. Model search/filter**
|
|
||||||
Search input above models list. Filter by name, family, quantization. Useful when 10+ models are installed.
|
|
||||||
|
|
||||||
- [x] **F3. Prompt history (localStorage)**
|
|
||||||
Store last 20 prompts with model name + timestamp. Dropdown in prompt modal to re-run previous prompts.
|
|
||||||
|
|
||||||
- [x] **F4. Chat mode (multi-turn conversation)**
|
|
||||||
Use Ollama `/api/chat` instead of `/api/generate`. Chat bubble layout with message history. System prompt input field.
|
|
||||||
|
|
||||||
- [x] **F5. Model comparison (side-by-side)**
|
|
||||||
Send same prompt to 2 models simultaneously. Display responses side-by-side with latency/quality comparison.
|
|
||||||
|
|
||||||
- [x] **F6. Token/s metrics after generation**
|
|
||||||
Parse `eval_count` and `eval_duration` from the final NDJSON chunk. Display tokens/second, total tokens, and latency in the response footer.
|
|
||||||
|
|
||||||
- [x] **F7. System resource sparklines (time-series)**
|
|
||||||
Ring buffer of memory/CPU snapshots (localStorage). Render mini sparkline charts in the System panel. Spot trends over time.
|
|
||||||
|
|
||||||
- [x] **F8. Ollama server logs viewer**
|
|
||||||
Read `~/.ollama/logs/` and display in a collapsible terminal-style panel. Filter by level. Auto-scroll.
|
|
||||||
|
|
||||||
- [x] **F9. Modelfile / template viewer**
|
|
||||||
The `show` action already fetches Modelfile, template, and system prompt. Display in a collapsible code block in expanded model details.
|
|
||||||
|
|
||||||
- [x] **F10. Dark/light theme toggle**
|
|
||||||
Add `:root.light` CSS variable overrides. Theme toggle with localStorage persistence. Current architecture supports this natively.
|
|
||||||
|
|
||||||
- [x] **F11. Keyboard shortcuts panel (`?` key)**
|
|
||||||
Show all shortcuts in a modal: ⌘+Enter (send), Esc (close), R (refresh), / (search models), ? (help).
|
|
||||||
|
|
||||||
- [x] **F12. Whisper transcription test**
|
|
||||||
Upload/record a short audio clip, transcribe locally via whisper-cli, display result with latency. Tests the full local STT pipeline.
|
|
||||||
|
|
||||||
- [x] **F13. Responsive mobile layout**
|
|
||||||
Better breakpoints for the 4-column stats row and 3-column main grid. Collapsible sidebar on mobile.
|
|
||||||
|
|
||||||
- [x] **F14. Model tags/labels (localStorage)**
|
|
||||||
User-defined tags (coding, fast, vision) with colored badges. Persisted in localStorage.
|
|
||||||
|
|
||||||
- [x] **F15. Extraction service integration panel**
|
|
||||||
Show extraction-service (port 4005) health status. Run test extractions against loaded Ollama models. Bridges dashboard to LysnrAI pipeline.
|
|
||||||
|
|
||||||
- [x] **F16. Auto-load preferred model**
|
|
||||||
Mark a model as "auto-load" (stored in localStorage). When Ollama is online but no models loaded, auto-load the preferred model.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 4. Performance & Reliability
|
|
||||||
|
|
||||||
- [x] **P1. No request deduplication on Refresh** — `page.tsx:164-176`
|
|
||||||
Rapid clicks on Refresh fire duplicate `fetchAll()` calls. Add a `fetchingRef` guard or disable the button during fetch (partially done for `actionLoading` but not for `fetchAll`).
|
|
||||||
|
|
||||||
- [x] **P2. Static cache never expires** — `api/system/route.ts:81-90`
|
|
||||||
`staticCache` (chip, GPU, brew) lives forever in the server process. Brew package upgrades won't reflect. Add 5-minute TTL.
|
|
||||||
|
|
||||||
- [x] **P3. `du -sk ~/.ollama/models` on every refresh** — `api/system/route.ts:41`
|
|
||||||
Traverses entire models directory every 15 seconds. Cache with 60-second TTL.
|
|
||||||
|
|
||||||
- [x] **P4. No fetch timeout on Ollama calls** — `api/ollama/route.ts:5-12`
|
|
||||||
`fetchOllama` has no `AbortSignal` or timeout. If Ollama hangs, the dashboard hangs. Add 5-second timeout.
|
|
||||||
|
|
||||||
- [x] **P5. `system_profiler` slow on first load** — `api/system/route.ts:52-53`
|
|
||||||
Takes ~2-3 seconds. Cached after first call, but first dashboard load waits. Consider eager background fetch on server start or return placeholder.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 5. Security & Hardening
|
|
||||||
|
|
||||||
- [x] **S1. No input validation on model names** — `api/ollama/route.ts:50-51`
|
|
||||||
`model` from request body passed directly to Ollama. Add regex validation: `^[a-zA-Z0-9._:/-]{1,256}$`.
|
|
||||||
|
|
||||||
- [x] **S2. Shell command interpolation pattern** — `api/system/route.ts:67`
|
|
||||||
`execAsync(\`brew list --versions ${pkg}\`)`— safe today (hardcoded targets) but fragile. Use`execFile('brew', ['list', '--versions', pkg])` for safety.
|
|
||||||
|
|
||||||
- [x] **S3. No CORS or auth** _(acceptable for local-only, documented)_
|
|
||||||
Any local process can call API routes. Fine for dev tool; document the assumption.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 6. Implementation Tracker
|
|
||||||
|
|
||||||
### Sprint 1 — Critical Bug Fixes _(est. 1–2 hrs)_
|
|
||||||
|
|
||||||
| # | ID | Task | Effort | Commit |
|
|
||||||
| --- | --------- | ----------------------------------------- | ------ | --------- |
|
|
||||||
| 1 | - [x] B4 | Guard Escape key during streaming | 5 min | `2da67c2` |
|
|
||||||
| 2 | - [x] B5 | Pause auto-refresh during prompt/pull | 10 min | `2da67c2` |
|
|
||||||
| 3 | - [x] B9 | Add AbortController to streaming fetch | 15 min | `2da67c2` |
|
|
||||||
| 4 | - [x] B1 | Dynamic chip/RAM in header | 5 min | `2da67c2` |
|
|
||||||
| 5 | - [x] B11 | Clear prompt text on Escape close | 5 min | `2da67c2` |
|
|
||||||
| 6 | - [x] P4 | Add timeout to Ollama fetch calls | 10 min | `2da67c2` |
|
|
||||||
| 7 | - [x] B3 | Remove dead generate action (or document) | 5 min | `2da67c2` |
|
|
||||||
| 8 | - [x] B6 | Use Date.now() for toast IDs | 2 min | `2da67c2` |
|
|
||||||
| 9 | - [x] B10 | Fix brew "Loading..." vs "empty" state | 5 min | `2da67c2` |
|
|
||||||
|
|
||||||
### Sprint 2 — Pull Progress + Metrics _(est. 2–3 hrs)_
|
|
||||||
|
|
||||||
| # | ID | Task | Effort | Commit |
|
|
||||||
| --- | ----------- | ----------------------------------- | ------ | --------- |
|
|
||||||
| 10 | - [x] B2+F1 | Streaming pull with progress bar | 60 min | `2d9475b` |
|
|
||||||
| 11 | - [x] F6 | Display tokens/s after generation | 30 min | `2d9475b` |
|
|
||||||
| 12 | - [x] B7 | Parse vm_stat page size dynamically | 10 min | `2d9475b` |
|
|
||||||
| 13 | - [x] B8 | Multi-path whisper model discovery | 15 min | `2d9475b` |
|
|
||||||
|
|
||||||
### Sprint 3 — Component Refactor _(est. 2–3 hrs)_
|
|
||||||
|
|
||||||
| # | ID | Task | Effort | Commit |
|
|
||||||
| --- | --------- | --------------------------------------- | ------ | --------- |
|
|
||||||
| 14 | - [x] CQ1 | Extract components into separate files | 90 min | `75a3cd0` |
|
|
||||||
| 15 | - [x] CQ4 | Add error.tsx Error Boundary | 15 min | `75a3cd0` |
|
|
||||||
| 16 | - [x] CQ3 | Shared ollama-config.ts | 10 min | `75a3cd0` |
|
|
||||||
| 17 | - [x] CQ2 | Consolidate inline styles → CSS classes | 45 min | `ed93a6f` |
|
|
||||||
| 18 | - [x] S1 | Add model name input validation | 10 min | `75a3cd0` |
|
|
||||||
| 19 | - [x] S2 | Replace exec → execFile for brew | 10 min | `75a3cd0` |
|
|
||||||
|
|
||||||
### Sprint 4 — UX Enhancements _(est. 3–4 hrs)_
|
|
||||||
|
|
||||||
| # | ID | Task | Effort | Commit |
|
|
||||||
| --- | --------- | ------------------------------------ | ------ | --------- |
|
|
||||||
| 20 | - [x] F3 | Prompt history (localStorage) | 45 min | `9c2f5f3` |
|
|
||||||
| 21 | - [x] F9 | Modelfile viewer in expanded details | 30 min | `9c2f5f3` |
|
|
||||||
| 22 | - [x] F4 | Chat mode (multi-turn via /api/chat) | 90 min | `ed93a6f` |
|
|
||||||
| 23 | - [x] F2 | Model search/filter | 30 min | `9c2f5f3` |
|
|
||||||
| 24 | - [x] F11 | Keyboard shortcuts panel | 20 min | `9c2f5f3` |
|
|
||||||
|
|
||||||
### Sprint 5 — Integration & Polish _(est. 2–3 hrs)_
|
|
||||||
|
|
||||||
| # | ID | Task | Effort | Commit |
|
|
||||||
| --- | ----------- | -------------------------- | ------ | --------- |
|
|
||||||
| 25 | - [x] F15 | Extraction service panel | 60 min | `8bdd5ee` |
|
|
||||||
| 26 | - [x] F12 | Whisper transcription test | 45 min | `8bdd5ee` |
|
|
||||||
| 27 | - [x] F7 | System resource sparklines | 45 min | `8bdd5ee` |
|
|
||||||
| 28 | - [x] CQ5 | Loading skeleton UI | 20 min | `8bdd5ee` |
|
|
||||||
| 29 | - [x] P1-P3 | Request dedup + cache TTLs | 30 min | `b1fda3a` |
|
|
||||||
| 30 | - [x] F16 | Auto-load preferred model | 20 min | `ed93a6f` |
|
|
||||||
|
|
||||||
### Deferred (nice-to-have)
|
|
||||||
|
|
||||||
| ID | Task | Notes |
|
|
||||||
| --------- | ------------------------------- | --------- |
|
|
||||||
| - [x] F5 | Model comparison (side-by-side) | `8bdd5ee` |
|
|
||||||
| - [x] F10 | Dark/light theme toggle | `ed93a6f` |
|
|
||||||
| - [x] F13 | Responsive mobile layout | `8bdd5ee` |
|
|
||||||
| - [x] F14 | Model tags/labels | `ed93a6f` |
|
|
||||||
| - [x] CQ6 | Zod validation on API responses | `ed93a6f` |
|
|
||||||
| - [x] F8 | Ollama server logs viewer | `8bdd5ee` |
|
|
||||||
| - [x] S3 | CORS / auth (documented) | `8bdd5ee` |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 7. Commit Log
|
|
||||||
|
|
||||||
_Commits will be added here as work progresses._
|
|
||||||
|
|
||||||
| # | Date | Commit | Sprint | Items Completed |
|
|
||||||
| --- | ------ | --------- | -------- | ------------------------------------ |
|
|
||||||
| 1 | Feb 19 | `2da67c2` | Sprint 1 | B1, B3, B4, B5, B6, B9, B10, B11, P4 |
|
|
||||||
| 2 | Feb 19 | `2d9475b` | Sprint 2 | B2, B7, B8, F1, F6 |
|
|
||||||
| 3 | Feb 19 | `75a3cd0` | Sprint 3 | CQ1, CQ3, CQ4, S1, S2 |
|
|
||||||
| 4 | Feb 19 | `9c2f5f3` | Sprint 4 | F2, F3, F9, F11 |
|
|
||||||
| 5 | Feb 19 | `b1fda3a` | Sprint 5 | P1, P2, P3 |
|
|
||||||
| 6 | Feb 19 | `ed93a6f` | Sprint 6 | CQ2, CQ6, P5, F4, F10, F14, F16 |
|
|
||||||
| 7 | Feb 19 | `8bdd5ee` | Sprint 7 | F5, F7, F8, F12, F13, F15, CQ5, S3 |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
> **39 items total:** 11 bugs, 6 code quality, 16 features, 5 performance, 3 security
|
|
||||||
> **All 39 items completed** across 7 sprints (9 code commits + doc updates)
|
|
||||||
> **Actual total effort:** ~8 hours across 7 sprints
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 8. Next Wave — Model Intelligence & Pre-Load Metrics
|
|
||||||
|
|
||||||
> Proposed improvements focused on helping users make informed decisions **before** loading a model.
|
|
||||||
|
|
||||||
### Tier A — Pre-Load Decision Metrics _(est. 45 min)_
|
|
||||||
|
|
||||||
| ID | Feature | Description |
|
|
||||||
| --- | ------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
||||||
| N1 | **Estimated RAM per model** | Approximate from disk size: Q4_K_M ≈ 1.2×disk in RAM. Show on every model card (e.g., `~22 GB RAM`), not just running models. |
|
|
||||||
| N2 | **"Will it fit?" indicator** | Compare estimated RAM vs `system.memory.free + cached`. Color-code: 🟢 Fits, 🟡 Tight (80–100%), 🔴 Won't fit. Show on Load button or as badge. |
|
|
||||||
| N3 | **Aggregate loaded model RAM** | Sum VRAM of all running models. Display at top of models panel: "3 models loaded · 28.5 GB VRAM". |
|
|
||||||
|
|
||||||
### Tier B — Rich Model Metadata _(est. 60 min)_
|
|
||||||
|
|
||||||
| ID | Feature | Description |
|
|
||||||
| --- | ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | ---------------- | ---------------- | ------------------------------------------ |
|
|
||||||
| N4 | **RAM budget bar** | Horizontal stacked bar: `[OS+Apps | Model A (loaded) | Model B (loaded) | Free]`. Instant visual of memory headroom. |
|
|
||||||
| N5 | **Context window size** | Fetch `context_length` from Ollama `/api/show` → `model_info`. Display on card (e.g., `128k ctx`). Critical for knowing max prompt length. |
|
|
||||||
|
|
||||||
### Tier C — Model Intelligence Badges _(est. 45 min)_
|
|
||||||
|
|
||||||
| ID | Feature | Description |
|
|
||||||
| --- | --------------------------- | --------------------------------------------------------------------------------------------------------------------------------- |
|
|
||||||
| N6 | **`<think>` warning badge** | If model is DeepSeek R1 family, show ⚠️ badge: "Emits `<think>` traces — strip before JSON.parse". Prevents silent JSON failures. |
|
|
||||||
| N7 | **Vision model indicator** | If model is multimodal (llava, qwen2.5vl), show 👁 badge. These need image input — text-only prompts are suboptimal. |
|
|
||||||
| N8 | **Architecture badge** | Show model arch (llama, qwen2, phi3, deepseek2) as subtle pill on the card. Currently buried in expanded details. |
|
|
||||||
| N9 | **Sort/order models** | Dropdown to sort by: name, size, parameters, running status, last modified. Currently uses Ollama's default order. |
|
|
||||||
| N10 | **Ollama version display** | Call `/api/version`. Show in Ollama status card. Useful for debugging model compatibility. |
|
|
||||||
|
|
||||||
### Tier D — Runtime Metrics & UX _(est. 30 min)_
|
|
||||||
|
|
||||||
| ID | Feature | Description |
|
|
||||||
| --- | --------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
||||||
| N11 | **Last known tok/s per model** | Persist `StreamMetrics.tokensPerSec` in localStorage keyed by model. Show on card (e.g., `~45 tok/s`). Compare speeds without re-benchmarking. |
|
|
||||||
| N12 | **Auto-unload countdown** | Replace static `Expires: 3:45 PM` with live countdown: `Unloads in 4m 32s`. More actionable. |
|
|
||||||
| N13 | **Session stats per model** | Track prompts sent + tokens generated per model in session. Show in expanded details. |
|
|
||||||
| N14 | **Delete confirmation + reclaim** | Show "Delete qwen2.5-coder:32b? Reclaim 18.5 GB disk." before deleting. Currently no confirmation. |
|
|
||||||
| N15 | **Simultaneous load suggestions** | Based on available RAM, suggest which models can be co-loaded. E.g., "Can co-load llama3.1:8b + qwen2.5-coder:32b (28 GB, 20 GB free)". |
|
|
||||||
|
|
||||||
### Implementation Plan
|
|
||||||
|
|
||||||
| Sprint | Items | Focus | Effort |
|
|
||||||
| ------ | ----------------------- | ------------------------ | ------- |
|
|
||||||
| 8 | N1, N2, N3 | Pre-load RAM estimates | ~45 min |
|
|
||||||
| 9 | N4, N5 | RAM bar + context window | ~60 min |
|
|
||||||
| 10 | N6, N7, N8, N9, N10 | Badges + sort + version | ~45 min |
|
|
||||||
| 11 | N11, N12, N13, N14, N15 | Runtime metrics + UX | ~30 min |
|
|
||||||
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
> Complete guide for the local AI inference stack on the ByteLyst development machine.
|
> Complete guide for the local AI inference stack on the ByteLyst development machine.
|
||||||
> Hardware: **Apple M4 Pro · 48 GB LPDDR5 · macOS Tahoe**
|
> Hardware: **Apple M4 Pro · 48 GB LPDDR5 · macOS Tahoe**
|
||||||
> Last updated: 2026-02-19
|
> Last updated: 2026-02-21
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@ -16,8 +16,11 @@ ollama serve # or: brew services start ollama
|
|||||||
ollama run qwen2.5-coder:32b # best coding model for this hardware
|
ollama run qwen2.5-coder:32b # best coding model for this hardware
|
||||||
|
|
||||||
# 3. Launch Mission Control dashboard
|
# 3. Launch Mission Control dashboard
|
||||||
cd __LOCAL_LLMs/dashboard && npm run dev -- -p 3100
|
cd __LOCAL_LLMs/dashboard && npm run dev
|
||||||
# Open http://localhost:3100
|
# Open http://localhost:3000
|
||||||
|
|
||||||
|
# 4. (Optional) Set up TTS
|
||||||
|
cd __LOCAL_LLMs && bash setup-tts.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
@ -35,6 +38,7 @@ cd __LOCAL_LLMs/dashboard && npm run dev -- -p 3100
|
|||||||
| 07 | [Model Recommendations](07-model-recommendations.md) | Tiered model guide by use case, size, and quality for M4 Pro 48GB |
|
| 07 | [Model Recommendations](07-model-recommendations.md) | Tiered model guide by use case, size, and quality for M4 Pro 48GB |
|
||||||
| 08 | [Troubleshooting & Corporate Proxy](08-troubleshooting.md) | Common issues, Forcepoint proxy workarounds, MLX warnings |
|
| 08 | [Troubleshooting & Corporate Proxy](08-troubleshooting.md) | Common issues, Forcepoint proxy workarounds, MLX warnings |
|
||||||
| 09 | [Environment Variables](09-environment-variables.md) | All config vars for Ollama, Whisper, dashboard, evals |
|
| 09 | [Environment Variables](09-environment-variables.md) | All config vars for Ollama, Whisper, dashboard, evals |
|
||||||
|
| 10 | [Text-to-Speech](10-text-to-speech.md) | Orpheus TTS via Ollama, Qwen3-TTS 0.6B, setup, corporate proxy |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@ -53,28 +57,42 @@ __LOCAL_LLMs/
|
|||||||
│ ├── 06-extraction-service-evals.md
|
│ ├── 06-extraction-service-evals.md
|
||||||
│ ├── 07-model-recommendations.md
|
│ ├── 07-model-recommendations.md
|
||||||
│ ├── 08-troubleshooting.md
|
│ ├── 08-troubleshooting.md
|
||||||
│ └── 09-environment-variables.md
|
│ ├── 09-environment-variables.md
|
||||||
├── dashboard/ ← Next.js Mission Control app (port 3100)
|
│ └── 10-text-to-speech.md
|
||||||
│ ├── src/app/page.tsx ← main dashboard UI
|
├── dashboard/ ← Next.js Mission Control app (port 3000)
|
||||||
|
│ ├── src/app/(mission-control)/ ← Mission Control page + memory drilldown
|
||||||
│ ├── src/app/api/ollama/route.ts ← Ollama API proxy (list, load, unload, generate)
|
│ ├── src/app/api/ollama/route.ts ← Ollama API proxy (list, load, unload, generate)
|
||||||
│ ├── src/app/api/whisper/route.ts ← Whisper binary/model discovery
|
│ ├── src/app/api/whisper/route.ts ← Whisper binary/model discovery
|
||||||
│ └── src/app/api/system/route.ts ← System info (chip, RAM, disk, brew)
|
│ ├── src/app/api/system/route.ts ← System info (chip, RAM, disk, brew)
|
||||||
|
│ └── src/app/api/system/memory/route.ts ← Memory drilldown (vm_stat + top processes)
|
||||||
|
├── setup-tts.sh ← One-shot TTS setup for fresh laptop
|
||||||
|
├── download-tts-models.sh ← Download model weights (uses hf-mirror.com)
|
||||||
|
├── test_orpheus_tts.py ← Orpheus TTS test (Ollama + SNAC decoder)
|
||||||
|
├── test_qwen_tts.py ← Qwen3-TTS 0.6B test (direct Python, MPS/CPU)
|
||||||
|
├── .venv-qwen-tts/ ← Python 3.12 venv for TTS (gitignored)
|
||||||
|
├── models/ ← Downloaded TTS model weights (gitignored)
|
||||||
└── LOCAL_LLMs_setup_mac_m4_48gb.md ← original doc (preserved, see docs/ for latest)
|
└── LOCAL_LLMs_setup_mac_m4_48gb.md ← original doc (preserved, see docs/ for latest)
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Current Installation Status (2026-02-19)
|
## Current Installation Status (2026-02-21)
|
||||||
|
|
||||||
| Component | Version | Status | Disk Usage |
|
| Component | Version | Status | Disk Usage |
|
||||||
| ----------------------------------- | ---------- | ----------------------------- | ---------- |
|
| ----------------------------------- | ---------- | ------------------------------------------ | ---------- |
|
||||||
| Ollama | 0.16.2 | ✅ Installed via brew | — |
|
| Ollama | 0.16.2 | ✅ Installed via brew | — |
|
||||||
| qwen2.5-coder:32b | — | ✅ Downloaded | 19 GB |
|
| qwen2.5-coder:32b | — | ✅ Downloaded | 19 GB |
|
||||||
|
| qwen2.5-coder:7b | — | ✅ Downloaded | 4.7 GB |
|
||||||
|
| deepseek-r1:32b | — | ✅ Downloaded | 19 GB |
|
||||||
| llama3.1:8b | — | ✅ Downloaded | 4.9 GB |
|
| llama3.1:8b | — | ✅ Downloaded | 4.9 GB |
|
||||||
|
| sematre/orpheus:en (TTS) | — | ✅ Downloaded via Ollama | 4 GB |
|
||||||
| whisper-cpp | 1.8.3 | ✅ Installed via brew | 9.6 MB |
|
| whisper-cpp | 1.8.3 | ✅ Installed via brew | 9.6 MB |
|
||||||
| whisper model (ggml-large-v3-turbo) | — | ❌ Blocked by corporate proxy | — |
|
| whisper model (ggml-large-v3-turbo) | — | ✅ Downloaded via hf-mirror.com | 1.5 GB |
|
||||||
| ffmpeg | 8.0.1 | ✅ Installed via brew | 53.3 MB |
|
| ffmpeg | 8.0.1 | ✅ Installed via brew | 53.3 MB |
|
||||||
| Mission Control Dashboard | Next.js 16 | ✅ Built, runs on :3100 | — |
|
| Python 3.12 (TTS venv) | 3.12.12 | ✅ Installed via brew + venv created | ~2 GB |
|
||||||
|
| SNAC decoder (TTS) | — | ✅ Downloaded via hf-mirror.com | 76 MB |
|
||||||
|
| Qwen3-TTS 0.6B | — | ✅ Downloaded via hf-mirror.com | 1.7 GB |
|
||||||
|
| Mission Control Dashboard | Next.js 16 | ✅ Built, runs on :3000 (memory drilldown) | — |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
174
__LOCAL_LLMs/download-tts-models.sh
Executable file
174
__LOCAL_LLMs/download-tts-models.sh
Executable file
@ -0,0 +1,174 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# ============================================================
|
||||||
|
# Download TTS Model Weights
|
||||||
|
#
|
||||||
|
# Downloads SNAC decoder + Qwen3-TTS from HuggingFace.
|
||||||
|
# Uses hf-mirror.com which works through corporate proxy.
|
||||||
|
# Falls back to huggingface.co if mirror is unreachable.
|
||||||
|
#
|
||||||
|
# No Python venv required — uses curl only.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# bash download-tts-models.sh # download all
|
||||||
|
# bash download-tts-models.sh snac # SNAC decoder only
|
||||||
|
# bash download-tts-models.sh qwen # Qwen3-TTS only
|
||||||
|
# ============================================================
|
||||||
|
set -e
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||||
|
MODELS_DIR="$SCRIPT_DIR/models"
|
||||||
|
|
||||||
|
GREEN='\033[0;32m'
|
||||||
|
RED='\033[0;31m'
|
||||||
|
NC='\033[0m'
|
||||||
|
ok() { echo -e "${GREEN}✓${NC} $1"; }
|
||||||
|
fail() { echo -e "${RED}✗${NC} $1"; exit 1; }
|
||||||
|
|
||||||
|
echo "=== TTS Model Downloader ==="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# ── Pick HuggingFace source ─────────────────────────────────
|
||||||
|
# Try hf-mirror.com first (works through corporate proxy)
|
||||||
|
# Fall back to huggingface.co (requires non-corporate network)
|
||||||
|
HF_BASE=""
|
||||||
|
echo "Testing hf-mirror.com..."
|
||||||
|
if curl -k -s --max-time 5 "https://hf-mirror.com/hubertsiuzdak/snac_24khz/raw/main/config.json" | python3 -c "import sys,json; json.load(sys.stdin)" &>/dev/null; then
|
||||||
|
HF_BASE="https://hf-mirror.com"
|
||||||
|
ok "Using hf-mirror.com (works through corporate proxy)"
|
||||||
|
else
|
||||||
|
echo "Mirror unavailable. Testing huggingface.co..."
|
||||||
|
if curl -s --max-time 5 "https://huggingface.co/api/models/hubertsiuzdak/snac_24khz" -o /dev/null 2>/dev/null; then
|
||||||
|
HF_BASE="https://huggingface.co"
|
||||||
|
ok "Using huggingface.co directly"
|
||||||
|
else
|
||||||
|
fail "Cannot reach hf-mirror.com or huggingface.co. If on corporate network, try from home WiFi."
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
mkdir -p "$MODELS_DIR"
|
||||||
|
|
||||||
|
# ── Helper: download with validation ────────────────────────
|
||||||
|
download_file() {
|
||||||
|
local URL="$1"
|
||||||
|
local DEST="$2"
|
||||||
|
local DESC="$3"
|
||||||
|
|
||||||
|
echo " Downloading $DESC..."
|
||||||
|
curl -k -L --progress-bar -o "$DEST" "$URL"
|
||||||
|
|
||||||
|
# Verify not an HTML block page
|
||||||
|
FILE_HEAD=$(head -c 50 "$DEST" 2>/dev/null)
|
||||||
|
if echo "$FILE_HEAD" | grep -qi "<!DOCTYPE\|<html"; then
|
||||||
|
rm -f "$DEST"
|
||||||
|
fail "Downloaded $DESC is HTML (proxy block page). Try from non-corporate network."
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── 1. SNAC 24kHz decoder ───────────────────────────────────
|
||||||
|
download_snac() {
|
||||||
|
echo "=== [SNAC] 24kHz Audio Decoder (~76 MB) ==="
|
||||||
|
mkdir -p "$MODELS_DIR/snac_24khz"
|
||||||
|
|
||||||
|
if [ -f "$MODELS_DIR/snac_24khz/pytorch_model.bin" ]; then
|
||||||
|
SIZE=$(stat -f%z "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null || stat -c%s "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null || echo 0)
|
||||||
|
if [ "$SIZE" -gt 1000000 ]; then
|
||||||
|
ok "Already downloaded ($(echo "scale=1; $SIZE/1048576" | bc) MB)"
|
||||||
|
echo ""
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
download_file "$HF_BASE/hubertsiuzdak/snac_24khz/raw/main/config.json" \
|
||||||
|
"$MODELS_DIR/snac_24khz/config.json" "config.json"
|
||||||
|
|
||||||
|
download_file "$HF_BASE/hubertsiuzdak/snac_24khz/resolve/main/pytorch_model.bin" \
|
||||||
|
"$MODELS_DIR/snac_24khz/pytorch_model.bin" "pytorch_model.bin (~76 MB)"
|
||||||
|
|
||||||
|
ok "SNAC decoder downloaded"
|
||||||
|
echo ""
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── 2. Qwen3-TTS Tokenizer ──────────────────────────────────
|
||||||
|
download_qwen_tokenizer() {
|
||||||
|
echo "=== [Qwen3-TTS] Tokenizer (~650 MB) ==="
|
||||||
|
local DIR="$MODELS_DIR/Qwen3-TTS-Tokenizer-12Hz"
|
||||||
|
mkdir -p "$DIR"
|
||||||
|
|
||||||
|
if [ -f "$DIR/model.safetensors" ]; then
|
||||||
|
SIZE=$(stat -f%z "$DIR/model.safetensors" 2>/dev/null || stat -c%s "$DIR/model.safetensors" 2>/dev/null || echo 0)
|
||||||
|
if [ "$SIZE" -gt 100000000 ]; then
|
||||||
|
ok "Already downloaded ($(echo "scale=1; $SIZE/1048576" | bc) MB)"
|
||||||
|
echo ""
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
for f in config.json configuration.json preprocessor_config.json; do
|
||||||
|
download_file "$HF_BASE/Qwen/Qwen3-TTS-Tokenizer-12Hz/resolve/main/$f" \
|
||||||
|
"$DIR/$f" "$f"
|
||||||
|
done
|
||||||
|
|
||||||
|
download_file "$HF_BASE/Qwen/Qwen3-TTS-Tokenizer-12Hz/resolve/main/model.safetensors" \
|
||||||
|
"$DIR/model.safetensors" "model.safetensors (~650 MB)"
|
||||||
|
|
||||||
|
ok "Qwen3-TTS Tokenizer downloaded"
|
||||||
|
echo ""
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── 3. Qwen3-TTS 0.6B model ─────────────────────────────────
|
||||||
|
download_qwen_model() {
|
||||||
|
echo "=== [Qwen3-TTS] 0.6B CustomVoice (~1.2 GB) ==="
|
||||||
|
local DIR="$MODELS_DIR/Qwen3-TTS-12Hz-0.6B-CustomVoice"
|
||||||
|
mkdir -p "$DIR"
|
||||||
|
|
||||||
|
if [ -f "$DIR/model.safetensors" ]; then
|
||||||
|
SIZE=$(stat -f%z "$DIR/model.safetensors" 2>/dev/null || stat -c%s "$DIR/model.safetensors" 2>/dev/null || echo 0)
|
||||||
|
if [ "$SIZE" -gt 100000000 ]; then
|
||||||
|
ok "Already downloaded ($(echo "scale=1; $SIZE/1048576" | bc) MB)"
|
||||||
|
echo ""
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
for f in config.json generation_config.json; do
|
||||||
|
download_file "$HF_BASE/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice/resolve/main/$f" \
|
||||||
|
"$DIR/$f" "$f"
|
||||||
|
done
|
||||||
|
|
||||||
|
download_file "$HF_BASE/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice/resolve/main/model.safetensors" \
|
||||||
|
"$DIR/model.safetensors" "model.safetensors (~1.2 GB)"
|
||||||
|
|
||||||
|
ok "Qwen3-TTS 0.6B downloaded"
|
||||||
|
echo ""
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Run downloads ────────────────────────────────────────────
|
||||||
|
case "${1:-all}" in
|
||||||
|
snac)
|
||||||
|
download_snac
|
||||||
|
;;
|
||||||
|
qwen)
|
||||||
|
download_qwen_tokenizer
|
||||||
|
download_qwen_model
|
||||||
|
;;
|
||||||
|
all)
|
||||||
|
download_snac
|
||||||
|
download_qwen_tokenizer
|
||||||
|
download_qwen_model
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Usage: bash download-tts-models.sh [snac|qwen|all]"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# ── Summary ──────────────────────────────────────────────────
|
||||||
|
echo "=== Downloads complete ==="
|
||||||
|
echo ""
|
||||||
|
echo "Disk usage:"
|
||||||
|
du -sh "$MODELS_DIR"/* 2>/dev/null | sed 's/^/ /'
|
||||||
|
echo ""
|
||||||
|
echo "Test commands:"
|
||||||
|
echo " .venv-qwen-tts/bin/python test_orpheus_tts.py # Orpheus via Ollama"
|
||||||
|
echo " .venv-qwen-tts/bin/python test_qwen_tts.py # Qwen3-TTS direct"
|
||||||
256
__LOCAL_LLMs/setup-tts.sh
Executable file
256
__LOCAL_LLMs/setup-tts.sh
Executable file
@ -0,0 +1,256 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# ============================================================
|
||||||
|
# TTS Setup — One-Shot Script for Fresh Laptop
|
||||||
|
#
|
||||||
|
# Sets up Orpheus TTS (via Ollama) and Qwen3-TTS (direct Python)
|
||||||
|
# on Apple Silicon Macs. Works through corporate proxy.
|
||||||
|
#
|
||||||
|
# What this does:
|
||||||
|
# 1. Installs Python 3.12 via Homebrew (if missing)
|
||||||
|
# 2. Creates Python venv with TTS packages
|
||||||
|
# 3. Pulls Orpheus TTS model via Ollama
|
||||||
|
# 4. Downloads SNAC audio decoder via hf-mirror.com
|
||||||
|
# 5. (Optional) Downloads Qwen3-TTS 0.6B via hf-mirror.com
|
||||||
|
#
|
||||||
|
# Prerequisites:
|
||||||
|
# - macOS with Apple Silicon (M1/M2/M3/M4)
|
||||||
|
# - Homebrew installed
|
||||||
|
# - Ollama installed (brew install ollama)
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# bash setup-tts.sh
|
||||||
|
#
|
||||||
|
# After setup, test with:
|
||||||
|
# .venv-qwen-tts/bin/python test_orpheus_tts.py
|
||||||
|
# afplay test_orpheus_tara.wav
|
||||||
|
# ============================================================
|
||||||
|
set -e
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||||
|
VENV="$SCRIPT_DIR/.venv-qwen-tts"
|
||||||
|
MODELS_DIR="$SCRIPT_DIR/models"
|
||||||
|
|
||||||
|
# HuggingFace mirror that works through corporate proxy
|
||||||
|
HF_MIRROR="https://hf-mirror.com"
|
||||||
|
|
||||||
|
RED='\033[0;31m'
|
||||||
|
GREEN='\033[0;32m'
|
||||||
|
YELLOW='\033[1;33m'
|
||||||
|
NC='\033[0m'
|
||||||
|
|
||||||
|
ok() { echo -e "${GREEN}✓${NC} $1"; }
|
||||||
|
warn() { echo -e "${YELLOW}⚠${NC} $1"; }
|
||||||
|
fail() { echo -e "${RED}✗${NC} $1"; exit 1; }
|
||||||
|
step() { echo -e "\n${GREEN}=== $1 ===${NC}"; }
|
||||||
|
|
||||||
|
echo "╔══════════════════════════════════════════════╗"
|
||||||
|
echo "║ TTS Setup — Local Speech Generation ║"
|
||||||
|
echo "║ Orpheus TTS (Ollama) + Qwen3-TTS (Python) ║"
|
||||||
|
echo "╚══════════════════════════════════════════════╝"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# ── 0. Check prerequisites ──────────────────────────────────
|
||||||
|
step "Checking prerequisites"
|
||||||
|
|
||||||
|
# Homebrew
|
||||||
|
if ! command -v brew &>/dev/null; then
|
||||||
|
fail "Homebrew not found. Install: /bin/bash -c \"\$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)\""
|
||||||
|
fi
|
||||||
|
ok "Homebrew"
|
||||||
|
|
||||||
|
# Ollama
|
||||||
|
if ! command -v ollama &>/dev/null; then
|
||||||
|
warn "Ollama not found. Installing..."
|
||||||
|
brew install ollama
|
||||||
|
fi
|
||||||
|
ok "Ollama installed"
|
||||||
|
|
||||||
|
# Check if Ollama is running
|
||||||
|
if ! curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then
|
||||||
|
warn "Ollama not running. Starting..."
|
||||||
|
ollama serve &>/dev/null &
|
||||||
|
sleep 3
|
||||||
|
if ! curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then
|
||||||
|
fail "Could not start Ollama. Try manually: ollama serve"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
ok "Ollama running on port 11434"
|
||||||
|
|
||||||
|
# Apple Silicon check
|
||||||
|
ARCH=$(uname -m)
|
||||||
|
if [ "$ARCH" != "arm64" ]; then
|
||||||
|
warn "Not Apple Silicon ($ARCH). MPS acceleration won't be available."
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ── 1. Install Python 3.12 ──────────────────────────────────
|
||||||
|
step "Python 3.12"
|
||||||
|
|
||||||
|
PYTHON_CMD=""
|
||||||
|
# Check various Python 3.12 locations
|
||||||
|
for cmd in python3.12 /opt/homebrew/bin/python3.12 /usr/local/bin/python3.12; do
|
||||||
|
if command -v "$cmd" &>/dev/null; then
|
||||||
|
PYTHON_CMD="$cmd"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z "$PYTHON_CMD" ]; then
|
||||||
|
warn "Python 3.12 not found. Installing via Homebrew..."
|
||||||
|
brew install python@3.12
|
||||||
|
PYTHON_CMD="/opt/homebrew/bin/python3.12"
|
||||||
|
fi
|
||||||
|
|
||||||
|
PYTHON_VER=$("$PYTHON_CMD" --version 2>&1)
|
||||||
|
ok "$PYTHON_VER at $PYTHON_CMD"
|
||||||
|
|
||||||
|
# ── 2. Create venv ──────────────────────────────────────────
|
||||||
|
step "Python virtual environment"
|
||||||
|
|
||||||
|
if [ -f "$VENV/bin/python" ]; then
|
||||||
|
ok "Venv exists at $VENV"
|
||||||
|
else
|
||||||
|
echo "Creating venv..."
|
||||||
|
"$PYTHON_CMD" -m venv "$VENV"
|
||||||
|
ok "Venv created at $VENV"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ── 3. Install Python packages ──────────────────────────────
|
||||||
|
step "Python packages"
|
||||||
|
|
||||||
|
# Check if snac is installed (quick proxy for all packages)
|
||||||
|
if "$VENV/bin/python" -c "import snac" &>/dev/null; then
|
||||||
|
ok "Packages already installed (snac, torch, etc.)"
|
||||||
|
else
|
||||||
|
echo "Installing packages (this may take a few minutes)..."
|
||||||
|
"$VENV/bin/pip" install -U pip --quiet
|
||||||
|
"$VENV/bin/pip" install -U snac qwen-tts --quiet
|
||||||
|
ok "Packages installed"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ── 4. Pull Orpheus TTS model ───────────────────────────────
|
||||||
|
step "Orpheus TTS model (Ollama)"
|
||||||
|
|
||||||
|
if ollama list 2>/dev/null | grep -q "orpheus"; then
|
||||||
|
ok "Orpheus TTS already downloaded"
|
||||||
|
else
|
||||||
|
echo "Pulling sematre/orpheus:en (4 GB)..."
|
||||||
|
NO_PROXY="ollama.com,registry.ollama.ai" ollama pull sematre/orpheus:en
|
||||||
|
ok "Orpheus TTS downloaded"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ── 5. Download SNAC decoder ────────────────────────────────
|
||||||
|
step "SNAC 24kHz audio decoder (~76 MB)"
|
||||||
|
|
||||||
|
mkdir -p "$MODELS_DIR/snac_24khz"
|
||||||
|
|
||||||
|
if [ -f "$MODELS_DIR/snac_24khz/pytorch_model.bin" ]; then
|
||||||
|
SIZE=$(stat -f%z "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null || stat -c%s "$MODELS_DIR/snac_24khz/pytorch_model.bin" 2>/dev/null)
|
||||||
|
if [ "$SIZE" -gt 1000000 ]; then
|
||||||
|
ok "SNAC decoder already downloaded ($(echo "scale=1; $SIZE/1048576" | bc) MB)"
|
||||||
|
else
|
||||||
|
warn "SNAC file looks corrupted (${SIZE} bytes). Re-downloading..."
|
||||||
|
rm -f "$MODELS_DIR/snac_24khz/pytorch_model.bin"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f "$MODELS_DIR/snac_24khz/pytorch_model.bin" ]; then
|
||||||
|
echo "Downloading config.json..."
|
||||||
|
curl -k -sL -o "$MODELS_DIR/snac_24khz/config.json" \
|
||||||
|
"$HF_MIRROR/hubertsiuzdak/snac_24khz/raw/main/config.json"
|
||||||
|
|
||||||
|
# Verify config is JSON (not an HTML block page)
|
||||||
|
if ! python3 -c "import json; json.load(open('$MODELS_DIR/snac_24khz/config.json'))" &>/dev/null; then
|
||||||
|
fail "Downloaded config.json is not valid JSON. The mirror may be blocked. Try from home network."
|
||||||
|
fi
|
||||||
|
ok "config.json downloaded"
|
||||||
|
|
||||||
|
echo "Downloading pytorch_model.bin (~76 MB)..."
|
||||||
|
curl -k -L --progress-bar -o "$MODELS_DIR/snac_24khz/pytorch_model.bin" \
|
||||||
|
"$HF_MIRROR/hubertsiuzdak/snac_24khz/resolve/main/pytorch_model.bin"
|
||||||
|
|
||||||
|
# Verify it's a real model file (zip/pytorch format), not HTML
|
||||||
|
FILE_TYPE=$(file -b "$MODELS_DIR/snac_24khz/pytorch_model.bin" | head -c 20)
|
||||||
|
if echo "$FILE_TYPE" | grep -qi "html"; then
|
||||||
|
rm -f "$MODELS_DIR/snac_24khz/pytorch_model.bin"
|
||||||
|
fail "Downloaded model is HTML (proxy block page). Try from home network."
|
||||||
|
fi
|
||||||
|
ok "SNAC decoder downloaded"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Verify SNAC loads in Python
|
||||||
|
echo "Verifying SNAC decoder loads..."
|
||||||
|
if "$VENV/bin/python" -c "
|
||||||
|
import snac, torch
|
||||||
|
model = snac.SNAC.from_pretrained('$MODELS_DIR/snac_24khz')
|
||||||
|
print(f'SNAC: {sum(p.numel() for p in model.parameters())/1e6:.1f}M parameters')
|
||||||
|
" 2>/dev/null; then
|
||||||
|
ok "SNAC decoder verified"
|
||||||
|
else
|
||||||
|
fail "SNAC decoder failed to load. Delete models/snac_24khz/ and re-run."
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ── 6. (Optional) Download Qwen3-TTS ────────────────────────
|
||||||
|
step "Qwen3-TTS 0.6B (optional, ~1.7 GB total)"
|
||||||
|
|
||||||
|
QWEN_TOKENIZER_DIR="$MODELS_DIR/Qwen3-TTS-Tokenizer-12Hz"
|
||||||
|
QWEN_MODEL_DIR="$MODELS_DIR/Qwen3-TTS-12Hz-0.6B-CustomVoice"
|
||||||
|
|
||||||
|
if [ -d "$QWEN_MODEL_DIR" ] && [ -f "$QWEN_MODEL_DIR/config.json" ]; then
|
||||||
|
ok "Qwen3-TTS already downloaded"
|
||||||
|
else
|
||||||
|
echo "Qwen3-TTS 0.6B requires ~1.7 GB download (tokenizer + model)."
|
||||||
|
echo "This is optional — Orpheus TTS (above) works without it."
|
||||||
|
read -p "Download Qwen3-TTS? [y/N] " -n 1 -r
|
||||||
|
echo
|
||||||
|
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||||
|
# Tokenizer (~650 MB)
|
||||||
|
echo "Downloading Qwen3-TTS Tokenizer (~650 MB)..."
|
||||||
|
mkdir -p "$QWEN_TOKENIZER_DIR"
|
||||||
|
for f in config.json configuration.json preprocessor_config.json; do
|
||||||
|
curl -k -sL -o "$QWEN_TOKENIZER_DIR/$f" \
|
||||||
|
"$HF_MIRROR/Qwen/Qwen3-TTS-Tokenizer-12Hz/resolve/main/$f" 2>/dev/null || true
|
||||||
|
done
|
||||||
|
curl -k -L --progress-bar -o "$QWEN_TOKENIZER_DIR/model.safetensors" \
|
||||||
|
"$HF_MIRROR/Qwen/Qwen3-TTS-Tokenizer-12Hz/resolve/main/model.safetensors"
|
||||||
|
ok "Tokenizer downloaded"
|
||||||
|
|
||||||
|
# Model
|
||||||
|
echo "Downloading Qwen3-TTS 0.6B (~1.2 GB)..."
|
||||||
|
mkdir -p "$QWEN_MODEL_DIR"
|
||||||
|
for f in config.json generation_config.json; do
|
||||||
|
curl -k -sL -o "$QWEN_MODEL_DIR/$f" \
|
||||||
|
"$HF_MIRROR/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice/resolve/main/$f" 2>/dev/null || true
|
||||||
|
done
|
||||||
|
curl -k -L --progress-bar -o "$QWEN_MODEL_DIR/model.safetensors" \
|
||||||
|
"$HF_MIRROR/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice/resolve/main/model.safetensors"
|
||||||
|
ok "Qwen3-TTS 0.6B downloaded"
|
||||||
|
else
|
||||||
|
warn "Skipped. You can re-run this script later to download."
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ── Summary ──────────────────────────────────────────────────
|
||||||
|
step "Setup Complete"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Installed components:"
|
||||||
|
echo " Orpheus TTS (Ollama): $(ollama list 2>/dev/null | grep orpheus | awk '{print $NF}' || echo 'ready')"
|
||||||
|
echo " SNAC decoder: $MODELS_DIR/snac_24khz/"
|
||||||
|
if [ -d "$QWEN_MODEL_DIR" ] && [ -f "$QWEN_MODEL_DIR/config.json" ]; then
|
||||||
|
echo " Qwen3-TTS 0.6B: $QWEN_MODEL_DIR/"
|
||||||
|
else
|
||||||
|
echo " Qwen3-TTS 0.6B: (not installed — re-run setup to add)"
|
||||||
|
fi
|
||||||
|
echo ""
|
||||||
|
echo "Disk usage:"
|
||||||
|
du -sh "$MODELS_DIR"/* 2>/dev/null | sed 's/^/ /'
|
||||||
|
echo ""
|
||||||
|
echo "Test commands:"
|
||||||
|
echo " $VENV/bin/python $SCRIPT_DIR/test_orpheus_tts.py"
|
||||||
|
echo " afplay test_orpheus_tara.wav"
|
||||||
|
if [ -d "$QWEN_MODEL_DIR" ]; then
|
||||||
|
echo " $VENV/bin/python $SCRIPT_DIR/test_qwen_tts.py"
|
||||||
|
fi
|
||||||
|
echo ""
|
||||||
|
echo "Voices: tara, leah, jess, leo, dan, mia, zac, zoe"
|
||||||
|
echo "Emotion: <laugh>, <chuckle>, <sigh>, <cough>, <groan>, <yawn>, <gasp>"
|
||||||
110
__LOCAL_LLMs/start-dashboard.sh
Executable file
110
__LOCAL_LLMs/start-dashboard.sh
Executable file
@ -0,0 +1,110 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# ============================================================
|
||||||
|
# Start Mission Control Dashboard + Ollama
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# bash start-dashboard.sh # start dashboard + ensure Ollama running
|
||||||
|
# bash start-dashboard.sh stop # stop dashboard
|
||||||
|
# bash start-dashboard.sh status # check status
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||||
|
DASHBOARD_DIR="$SCRIPT_DIR/dashboard"
|
||||||
|
PORT=3000
|
||||||
|
OLLAMA_URL="http://localhost:11434"
|
||||||
|
|
||||||
|
GREEN='\033[0;32m'
|
||||||
|
RED='\033[0;31m'
|
||||||
|
YELLOW='\033[1;33m'
|
||||||
|
NC='\033[0m'
|
||||||
|
ok() { echo -e "${GREEN}✓${NC} $1"; }
|
||||||
|
warn() { echo -e "${YELLOW}⚠${NC} $1"; }
|
||||||
|
fail() { echo -e "${RED}✗${NC} $1"; }
|
||||||
|
|
||||||
|
case "${1:-start}" in
|
||||||
|
stop)
|
||||||
|
echo "Stopping dashboard..."
|
||||||
|
PID=$(lsof -ti :$PORT 2>/dev/null)
|
||||||
|
if [ -n "$PID" ]; then
|
||||||
|
kill "$PID" 2>/dev/null
|
||||||
|
ok "Dashboard stopped (PID $PID)"
|
||||||
|
else
|
||||||
|
warn "Dashboard not running on port $PORT"
|
||||||
|
fi
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
|
||||||
|
status)
|
||||||
|
echo "=== Status ==="
|
||||||
|
# Ollama
|
||||||
|
if curl -s --max-time 2 "$OLLAMA_URL/api/tags" &>/dev/null; then
|
||||||
|
MODELS=$(curl -s "$OLLAMA_URL/api/tags" | python3 -c "import sys,json; print(len(json.load(sys.stdin).get('models',[])))" 2>/dev/null || echo "?")
|
||||||
|
ok "Ollama running ($MODELS models)"
|
||||||
|
else
|
||||||
|
fail "Ollama not running"
|
||||||
|
fi
|
||||||
|
# Dashboard
|
||||||
|
if curl -s --max-time 2 "http://localhost:$PORT" &>/dev/null; then
|
||||||
|
ok "Dashboard running at http://localhost:$PORT"
|
||||||
|
else
|
||||||
|
fail "Dashboard not running"
|
||||||
|
fi
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
|
||||||
|
start)
|
||||||
|
echo "=== Starting Mission Control ==="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# 1. Ensure Ollama is running
|
||||||
|
if curl -s --max-time 2 "$OLLAMA_URL/api/tags" &>/dev/null; then
|
||||||
|
ok "Ollama already running"
|
||||||
|
else
|
||||||
|
echo "Starting Ollama..."
|
||||||
|
ollama serve &>/dev/null &
|
||||||
|
sleep 2
|
||||||
|
if curl -s --max-time 2 "$OLLAMA_URL/api/tags" &>/dev/null; then
|
||||||
|
ok "Ollama started"
|
||||||
|
else
|
||||||
|
fail "Could not start Ollama. Try: ollama serve"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 2. Check if dashboard already running
|
||||||
|
if curl -s --max-time 2 "http://localhost:$PORT" &>/dev/null; then
|
||||||
|
ok "Dashboard already running at http://localhost:$PORT"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 3. Install deps if needed
|
||||||
|
if [ ! -d "$DASHBOARD_DIR/node_modules" ]; then
|
||||||
|
echo "Installing dependencies..."
|
||||||
|
(cd "$DASHBOARD_DIR" && npm install --silent)
|
||||||
|
ok "Dependencies installed"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 4. Start dashboard
|
||||||
|
echo "Starting dashboard on port $PORT..."
|
||||||
|
(cd "$DASHBOARD_DIR" && npm run dev &>/dev/null &)
|
||||||
|
|
||||||
|
# Wait for it to be ready
|
||||||
|
for i in $(seq 1 15); do
|
||||||
|
if curl -s --max-time 1 "http://localhost:$PORT" &>/dev/null; then
|
||||||
|
ok "Dashboard ready at http://localhost:$PORT"
|
||||||
|
echo ""
|
||||||
|
echo "Open: http://localhost:$PORT"
|
||||||
|
echo "Stop: bash start-dashboard.sh stop"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
fail "Dashboard did not start within 15s. Check: cd dashboard && npm run dev"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
|
||||||
|
*)
|
||||||
|
echo "Usage: bash start-dashboard.sh [start|stop|status]"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
189
__LOCAL_LLMs/test_orpheus_tts.py
Normal file
189
__LOCAL_LLMs/test_orpheus_tts.py
Normal file
@ -0,0 +1,189 @@
|
|||||||
|
"""
|
||||||
|
Test Orpheus TTS via Ollama + SNAC decoder.
|
||||||
|
|
||||||
|
Prerequisites:
|
||||||
|
1. bash setup-tts.sh (one-shot: installs everything)
|
||||||
|
-- OR manually --
|
||||||
|
1. ollama pull sematre/orpheus:en
|
||||||
|
2. bash download-tts-models.sh snac (downloads SNAC via hf-mirror.com)
|
||||||
|
3. ollama serve (must be running)
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
.venv-qwen-tts/bin/python test_orpheus_tts.py
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
import struct
|
||||||
|
import wave
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
SNAC_MODEL_DIR = os.path.join(SCRIPT_DIR, "models", "snac_24khz")
|
||||||
|
OLLAMA_URL = "http://localhost:11434"
|
||||||
|
MODEL = "sematre/orpheus:en"
|
||||||
|
|
||||||
|
AUDIO_TOKEN_RE = re.compile(r"<custom_token_(\d+)>")
|
||||||
|
|
||||||
|
|
||||||
|
def check_ollama():
|
||||||
|
"""Verify Ollama is running and model is available."""
|
||||||
|
try:
|
||||||
|
req = urllib.request.Request(f"{OLLAMA_URL}/api/tags")
|
||||||
|
with urllib.request.urlopen(req, timeout=3) as resp:
|
||||||
|
data = json.loads(resp.read())
|
||||||
|
names = [m["name"] for m in data.get("models", [])]
|
||||||
|
if not any(MODEL in n for n in names):
|
||||||
|
print(f"ERROR: Model '{MODEL}' not found. Run: ollama pull {MODEL}")
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"ERROR: Cannot connect to Ollama at {OLLAMA_URL}: {e}")
|
||||||
|
print("Run: ollama serve")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def check_snac():
|
||||||
|
"""Verify SNAC model is downloaded."""
|
||||||
|
if not os.path.isdir(SNAC_MODEL_DIR):
|
||||||
|
print(f"ERROR: SNAC decoder not found at {SNAC_MODEL_DIR}")
|
||||||
|
print("Run: bash setup-tts.sh (or: bash download-tts-models.sh snac)")
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def load_snac():
|
||||||
|
"""Load SNAC audio codec."""
|
||||||
|
import torch
|
||||||
|
import snac
|
||||||
|
|
||||||
|
print(f"Loading SNAC decoder from {SNAC_MODEL_DIR}...")
|
||||||
|
model = snac.SNAC.from_pretrained(SNAC_MODEL_DIR)
|
||||||
|
model.eval()
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
def generate_tokens(text: str, voice: str = "tara") -> str:
|
||||||
|
"""Call Ollama to generate audio tokens from text."""
|
||||||
|
prompt = f"<custom_token_3><|begin_of_text|>{voice}: {text}<|eot_id|><custom_token_4><custom_token_5><custom_token_1>"
|
||||||
|
|
||||||
|
payload = json.dumps({
|
||||||
|
"model": MODEL,
|
||||||
|
"prompt": prompt,
|
||||||
|
"stream": False,
|
||||||
|
"options": {
|
||||||
|
"temperature": 0.6,
|
||||||
|
"top_p": 0.9,
|
||||||
|
"repeat_penalty": 1.1,
|
||||||
|
"num_predict": 10240,
|
||||||
|
"stop": ["<|end_of_text|>"],
|
||||||
|
},
|
||||||
|
}).encode()
|
||||||
|
|
||||||
|
req = urllib.request.Request(
|
||||||
|
f"{OLLAMA_URL}/api/generate",
|
||||||
|
data=payload,
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Generating audio tokens via Ollama...")
|
||||||
|
t0 = time.time()
|
||||||
|
with urllib.request.urlopen(req, timeout=120) as resp:
|
||||||
|
result = json.loads(resp.read())
|
||||||
|
|
||||||
|
elapsed = time.time() - t0
|
||||||
|
response_text = result.get("response", "")
|
||||||
|
token_count = len(AUDIO_TOKEN_RE.findall(response_text))
|
||||||
|
print(f"Generated {token_count} audio tokens in {elapsed:.1f}s")
|
||||||
|
return response_text
|
||||||
|
|
||||||
|
|
||||||
|
def decode_tokens(response_text: str, snac_model) -> tuple:
|
||||||
|
"""Convert audio tokens to WAV audio."""
|
||||||
|
import torch
|
||||||
|
|
||||||
|
tokens = AUDIO_TOKEN_RE.findall(response_text)
|
||||||
|
if not tokens:
|
||||||
|
print("ERROR: No audio tokens found in response")
|
||||||
|
return None, 0
|
||||||
|
|
||||||
|
audio_ids = [
|
||||||
|
int(tok) - 10 - ((idx % 7) * 4096)
|
||||||
|
for idx, tok in enumerate(tokens)
|
||||||
|
]
|
||||||
|
|
||||||
|
# Trim to multiple of 7
|
||||||
|
audio_ids = audio_ids[: len(audio_ids) // 7 * 7]
|
||||||
|
if len(audio_ids) == 0:
|
||||||
|
print("ERROR: Not enough audio tokens to decode")
|
||||||
|
return None, 0
|
||||||
|
|
||||||
|
audio_tensor = torch.tensor(audio_ids, dtype=torch.int32).reshape(-1, 7)
|
||||||
|
codes_0 = audio_tensor[:, 0].unsqueeze(0)
|
||||||
|
codes_1 = torch.stack((audio_tensor[:, 1], audio_tensor[:, 4])).t().flatten().unsqueeze(0)
|
||||||
|
codes_2 = (
|
||||||
|
torch.stack((audio_tensor[:, 2], audio_tensor[:, 3], audio_tensor[:, 5], audio_tensor[:, 6]))
|
||||||
|
.t()
|
||||||
|
.flatten()
|
||||||
|
.unsqueeze(0)
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Decoding audio...")
|
||||||
|
with torch.inference_mode():
|
||||||
|
audio_hat = snac_model.decode([codes_0, codes_1, codes_2])
|
||||||
|
|
||||||
|
audio_np = audio_hat[0].squeeze().numpy()
|
||||||
|
return audio_np, 24000
|
||||||
|
|
||||||
|
|
||||||
|
def save_wav(audio_np, sample_rate: int, path: str):
|
||||||
|
"""Save numpy audio array as 16-bit WAV."""
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# Normalize to int16
|
||||||
|
audio_int16 = (audio_np * 32767).clip(-32768, 32767).astype(np.int16)
|
||||||
|
|
||||||
|
with wave.open(path, "w") as wf:
|
||||||
|
wf.setnchannels(1)
|
||||||
|
wf.setsampwidth(2)
|
||||||
|
wf.setframerate(sample_rate)
|
||||||
|
wf.writeframes(audio_int16.tobytes())
|
||||||
|
|
||||||
|
duration = len(audio_int16) / sample_rate
|
||||||
|
print(f"Saved {path} ({duration:.1f}s, {sample_rate} Hz)")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("=== Orpheus TTS Test (Ollama + SNAC) ===\n")
|
||||||
|
|
||||||
|
if not check_ollama():
|
||||||
|
return
|
||||||
|
if not check_snac():
|
||||||
|
return
|
||||||
|
|
||||||
|
snac_model = load_snac()
|
||||||
|
|
||||||
|
# Voices: tara, leah, jess, leo, dan, mia, zac, zoe
|
||||||
|
tests = [
|
||||||
|
("Hello! This is Orpheus text to speech, running entirely on your Mac through Ollama.", "tara"),
|
||||||
|
("<laugh> That's amazing! Local AI speech generation without any cloud services!", "leo"),
|
||||||
|
]
|
||||||
|
|
||||||
|
for i, (text, voice) in enumerate(tests):
|
||||||
|
print(f"\n--- Test {i+1}: voice={voice} ---")
|
||||||
|
print(f"Text: {text[:80]}...")
|
||||||
|
|
||||||
|
response = generate_tokens(text, voice)
|
||||||
|
audio, sr = decode_tokens(response, snac_model)
|
||||||
|
|
||||||
|
if audio is not None:
|
||||||
|
outpath = os.path.join(SCRIPT_DIR, f"test_orpheus_{voice}.wav")
|
||||||
|
save_wav(audio, sr, outpath)
|
||||||
|
|
||||||
|
print("\n=== Done! Open the .wav files to listen. ===")
|
||||||
|
print("Play with: afplay test_orpheus_tara.wav")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
84
__LOCAL_LLMs/test_qwen_tts.py
Normal file
84
__LOCAL_LLMs/test_qwen_tts.py
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
"""
|
||||||
|
Test Qwen3-TTS 0.6B on Apple Silicon (MPS or CPU fallback).
|
||||||
|
|
||||||
|
Prerequisites:
|
||||||
|
bash setup-tts.sh (one-shot: installs everything)
|
||||||
|
-- OR manually --
|
||||||
|
bash download-tts-models.sh (downloads models via hf-mirror.com)
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
.venv-qwen-tts/bin/python test_qwen_tts.py
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import torch
|
||||||
|
import soundfile as sf
|
||||||
|
from qwen_tts import Qwen3TTSModel
|
||||||
|
|
||||||
|
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
MODEL_PATH = os.path.join(SCRIPT_DIR, "models", "Qwen3-TTS-12Hz-0.6B-CustomVoice")
|
||||||
|
|
||||||
|
# Check model exists locally
|
||||||
|
if not os.path.isdir(MODEL_PATH):
|
||||||
|
print(f"ERROR: Model not found at {MODEL_PATH}")
|
||||||
|
print("Run: bash setup-tts.sh (or: bash download-tts-models.sh qwen)")
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
# Pick device: MPS if available, else CPU
|
||||||
|
if torch.backends.mps.is_available():
|
||||||
|
device = "mps"
|
||||||
|
dtype = torch.float32 # MPS doesn't support bfloat16
|
||||||
|
print(f"Using MPS (Apple Metal GPU)")
|
||||||
|
else:
|
||||||
|
device = "cpu"
|
||||||
|
dtype = torch.float32
|
||||||
|
print(f"Using CPU")
|
||||||
|
|
||||||
|
print(f"Loading Qwen3-TTS-12Hz-0.6B-CustomVoice on {device}...")
|
||||||
|
t0 = time.time()
|
||||||
|
|
||||||
|
model = Qwen3TTSModel.from_pretrained(
|
||||||
|
MODEL_PATH,
|
||||||
|
device_map=device,
|
||||||
|
dtype=dtype,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Model loaded in {time.time() - t0:.1f}s")
|
||||||
|
print(f"Supported speakers: {model.get_supported_speakers()}")
|
||||||
|
print(f"Supported languages: {model.get_supported_languages()}")
|
||||||
|
|
||||||
|
# Test 1: English with a built-in speaker
|
||||||
|
text = "Hello! Welcome to the local LLM dashboard. I am Qwen three T T S, running entirely on your Mac."
|
||||||
|
print(f"\nGenerating speech for: {text[:60]}...")
|
||||||
|
|
||||||
|
t1 = time.time()
|
||||||
|
wavs, sr = model.generate_custom_voice(
|
||||||
|
text=text,
|
||||||
|
language="English",
|
||||||
|
speaker="Chelsie",
|
||||||
|
)
|
||||||
|
elapsed = time.time() - t1
|
||||||
|
print(f"Generated in {elapsed:.1f}s, sample rate={sr}, audio length={len(wavs[0])/sr:.1f}s")
|
||||||
|
|
||||||
|
output_path = "test_output_english.wav"
|
||||||
|
sf.write(output_path, wavs[0], sr)
|
||||||
|
print(f"Saved to {output_path}")
|
||||||
|
|
||||||
|
# Test 2: English with emotion instruction
|
||||||
|
text2 = "This is absolutely incredible! I can't believe how well this works on a local machine!"
|
||||||
|
print(f"\nGenerating with emotion: {text2[:60]}...")
|
||||||
|
|
||||||
|
t2 = time.time()
|
||||||
|
wavs2, sr2 = model.generate_custom_voice(
|
||||||
|
text=text2,
|
||||||
|
language="English",
|
||||||
|
speaker="Chelsie",
|
||||||
|
instruct="Speak with excitement and enthusiasm",
|
||||||
|
)
|
||||||
|
elapsed2 = time.time() - t2
|
||||||
|
print(f"Generated in {elapsed2:.1f}s, audio length={len(wavs2[0])/sr2:.1f}s")
|
||||||
|
|
||||||
|
sf.write("test_output_excited.wav", wavs2[0], sr2)
|
||||||
|
print("Saved to test_output_excited.wav")
|
||||||
|
|
||||||
|
print("\nDone! Open the .wav files to listen.")
|
||||||
387
__LOCAL_LLMs/windows_specific/razer-blade-18-spec.md
Normal file
387
__LOCAL_LLMs/windows_specific/razer-blade-18-spec.md
Normal file
@ -0,0 +1,387 @@
|
|||||||
|
Here is a complete engineering-grade specification document for the exact configuration you shared:
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
Razer Blade 18 (Model: RZ09-05299ER9-R3U1) — Detailed Specification Document
|
||||||
|
|
||||||
|
Manufacturer: Razer Inc.
|
||||||
|
Product Line: Blade Series
|
||||||
|
Model Number: RZ09-05299ER9-R3U1
|
||||||
|
Form Factor: High-performance desktop-class gaming & workstation laptop
|
||||||
|
Release Generation: RTX 50-series era (2026)
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
1. System Overview
|
||||||
|
|
||||||
|
The Razer Blade 18 is positioned as a flagship desktop-replacement laptop, integrating Intel Core Ultra HX processors, NVIDIA RTX 50-series GPUs, ultra-high refresh displays, and workstation-level memory/storage configurations. 
|
||||||
|
|
||||||
|
Primary Target Use Cases
|
||||||
|
• AAA gaming at maximum settings (4K, ray tracing)
|
||||||
|
• AI / ML model development (local inference, CUDA workloads)
|
||||||
|
• Software development & compilation
|
||||||
|
• 3D rendering, Unreal Engine, Blender
|
||||||
|
• Video editing (8K workflows)
|
||||||
|
• Desktop replacement workstation
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
2. CPU (Processor)
|
||||||
|
|
||||||
|
Processor: Intel® Core™ Ultra 9 275HX 
|
||||||
|
|
||||||
|
Architecture
|
||||||
|
|
||||||
|
Attribute Specification
|
||||||
|
CPU family Intel Core Ultra HX Series
|
||||||
|
Architecture Intel Meteor Lake / Arrow Lake HX class
|
||||||
|
Core design Hybrid architecture
|
||||||
|
Core types Performance cores + Efficient cores
|
||||||
|
Target TDP ~55W base (HX class), scalable to ~157W turbo
|
||||||
|
Fabrication Intel 3 / advanced node
|
||||||
|
Integrated AI accelerator Intel NPU (Neural Processing Unit)
|
||||||
|
|
||||||
|
Estimated core configuration (typical for Ultra 9 HX class)
|
||||||
|
|
||||||
|
Core type Count
|
||||||
|
Performance cores 8
|
||||||
|
Efficient cores 16
|
||||||
|
Total cores 24
|
||||||
|
Threads 24
|
||||||
|
|
||||||
|
AI acceleration
|
||||||
|
|
||||||
|
Integrated:
|
||||||
|
• Intel NPU
|
||||||
|
• AVX-512 support
|
||||||
|
• VNNI instructions
|
||||||
|
• Hardware AI acceleration support
|
||||||
|
|
||||||
|
Use cases:
|
||||||
|
• Local AI inference
|
||||||
|
• Background Copilot AI tasks
|
||||||
|
• AI-assisted workflows
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
3. GPU (Graphics)
|
||||||
|
|
||||||
|
Discrete GPU: NVIDIA GeForce RTX 5090 Laptop GPU 
|
||||||
|
VRAM: 24 GB GDDR7 VRAM 
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
GPU Architecture
|
||||||
|
|
||||||
|
Attribute Specification
|
||||||
|
Architecture NVIDIA Blackwell (RTX 50-series)
|
||||||
|
Memory type GDDR7
|
||||||
|
VRAM size 24 GB
|
||||||
|
CUDA cores Estimated ~18,000–20,000
|
||||||
|
Ray tracing cores 4th or 5th Gen RT cores
|
||||||
|
Tensor cores 5th or 6th Gen
|
||||||
|
PCIe interface PCIe Gen 5
|
||||||
|
DirectX support DirectX 12 Ultimate
|
||||||
|
Vulkan support Yes
|
||||||
|
OpenCL support Yes
|
||||||
|
CUDA support Yes
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
GPU Compute Capability
|
||||||
|
|
||||||
|
Feature Support
|
||||||
|
CUDA compute Yes
|
||||||
|
Tensor acceleration Yes
|
||||||
|
DLSS DLSS 4
|
||||||
|
Ray tracing Hardware accelerated
|
||||||
|
AI inference Excellent
|
||||||
|
Stable diffusion Excellent
|
||||||
|
Local LLM inference Excellent
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
AI / ML Capability Estimate
|
||||||
|
|
||||||
|
Model Expected Performance
|
||||||
|
Llama 3 8B Real-time
|
||||||
|
Llama 3 70B quantized Usable
|
||||||
|
Stable Diffusion XL Very fast
|
||||||
|
Whisper large Very fast
|
||||||
|
TensorRT inference Excellent
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
4. RAM (Memory)
|
||||||
|
|
||||||
|
Installed memory: 64 GB RAM 
|
||||||
|
Memory speed: 5600 MHz 
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
Memory Details
|
||||||
|
|
||||||
|
Attribute Specification
|
||||||
|
Capacity 64 GB
|
||||||
|
Type DDR5
|
||||||
|
Speed 5600 MHz
|
||||||
|
Channels Dual channel
|
||||||
|
ECC No
|
||||||
|
Upgradeability Yes (depends on configuration)
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
Memory bandwidth estimate
|
||||||
|
|
||||||
|
~90–120 GB/sec
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
5. Storage
|
||||||
|
|
||||||
|
Installed storage: 4 TB SSD (2 TB + 2 TB) 
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
Storage configuration
|
||||||
|
|
||||||
|
Attribute Specification
|
||||||
|
Total capacity 4 TB
|
||||||
|
Drive type NVMe SSD
|
||||||
|
Interface PCIe Gen 4 or Gen 5
|
||||||
|
Configuration Dual SSD
|
||||||
|
RAID support Possible
|
||||||
|
Upgradeable Yes
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
Storage performance estimate
|
||||||
|
|
||||||
|
Metric Expected
|
||||||
|
Sequential read 7,000–14,000 MB/sec
|
||||||
|
Sequential write 6,000–12,000 MB/sec
|
||||||
|
Random IOPS >1 million
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
6. Display
|
||||||
|
|
||||||
|
Display size: 18 inches 
|
||||||
|
Display modes: Dual mode UHD+ 240 Hz / FHD+ 440 Hz 
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
Display detailed specifications
|
||||||
|
|
||||||
|
Attribute Specification
|
||||||
|
Size 18 inches
|
||||||
|
Mode 1 resolution UHD+ (3840×2400)
|
||||||
|
Mode 2 resolution FHD+ (1920×1200)
|
||||||
|
Refresh rate (UHD+) 240 Hz
|
||||||
|
Refresh rate (FHD+) 440 Hz
|
||||||
|
Aspect ratio 16:10
|
||||||
|
Panel type IPS or Mini-LED
|
||||||
|
Adaptive sync Yes
|
||||||
|
Response time <3 ms (estimated)
|
||||||
|
HDR support Likely HDR 600–1000
|
||||||
|
Color gamut 100% DCI-P3
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
Dual-mode display explanation
|
||||||
|
|
||||||
|
Switchable between:
|
||||||
|
|
||||||
|
Mode Use case
|
||||||
|
UHD+ 240 Hz Visual quality, editing
|
||||||
|
FHD+ 440 Hz Competitive gaming
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
7. Operating System
|
||||||
|
|
||||||
|
OS: Windows 11 Home 
|
||||||
|
|
||||||
|
Supports:
|
||||||
|
• DirectX 12 Ultimate
|
||||||
|
• WSL2
|
||||||
|
• CUDA
|
||||||
|
• AI frameworks
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
8. Cooling System
|
||||||
|
|
||||||
|
Advanced vapor chamber cooling system.
|
||||||
|
|
||||||
|
Expected features:
|
||||||
|
• Vapor chamber cooling
|
||||||
|
• Dual fan cooling
|
||||||
|
• Liquid metal thermal interface
|
||||||
|
• Advanced heat pipe network
|
||||||
|
|
||||||
|
Supports sustained:
|
||||||
|
• CPU ~120W+
|
||||||
|
• GPU ~175W+
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
9. Connectivity & Ports (Expected for Blade 18)
|
||||||
|
|
||||||
|
Typical Blade 18 includes:
|
||||||
|
|
||||||
|
USB
|
||||||
|
• 3× USB-A 3.2 Gen 2
|
||||||
|
• 2× USB-C (Thunderbolt 4 / USB4)
|
||||||
|
|
||||||
|
Video
|
||||||
|
• HDMI 2.1
|
||||||
|
• Thunderbolt video output
|
||||||
|
|
||||||
|
Network
|
||||||
|
• 2.5 Gb Ethernet
|
||||||
|
|
||||||
|
Audio
|
||||||
|
• 3.5 mm combo jack
|
||||||
|
|
||||||
|
Storage expansion
|
||||||
|
• Dual NVMe slots
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
10. Wireless Connectivity
|
||||||
|
|
||||||
|
Expected:
|
||||||
|
|
||||||
|
Technology Support
|
||||||
|
Wi-Fi Wi-Fi 7
|
||||||
|
Bluetooth Bluetooth 5.4
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
11. Power System
|
||||||
|
|
||||||
|
Estimated:
|
||||||
|
|
||||||
|
Attribute Specification
|
||||||
|
Power adapter 330W–400W
|
||||||
|
Battery ~90–99 Wh
|
||||||
|
Charging Fast charging supported
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
12. Keyboard
|
||||||
|
• Per-key RGB lighting
|
||||||
|
• Razer Chroma support
|
||||||
|
• Anti-ghosting
|
||||||
|
• Full keyboard with numpad
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
13. Build and Physical
|
||||||
|
|
||||||
|
Expected typical Blade 18 chassis:
|
||||||
|
|
||||||
|
Attribute Specification
|
||||||
|
Material CNC aluminum
|
||||||
|
Color Black
|
||||||
|
Thickness ~21–24 mm
|
||||||
|
Weight ~3.1 kg
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
14. AI / ML Workstation Capability Assessment
|
||||||
|
|
||||||
|
This laptop is excellent for:
|
||||||
|
|
||||||
|
Supported workloads
|
||||||
|
|
||||||
|
Workload Capability
|
||||||
|
CUDA development Excellent
|
||||||
|
Stable diffusion Excellent
|
||||||
|
LLM inference Excellent
|
||||||
|
PyTorch training Very good
|
||||||
|
TensorFlow Excellent
|
||||||
|
Unreal Engine Excellent
|
||||||
|
Blender Excellent
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
15. Gaming Performance Tier
|
||||||
|
|
||||||
|
Expected performance:
|
||||||
|
|
||||||
|
Resolution Performance
|
||||||
|
1080p Extreme
|
||||||
|
1440p Extreme
|
||||||
|
4K Excellent
|
||||||
|
|
||||||
|
Ray tracing:
|
||||||
|
• Excellent
|
||||||
|
|
||||||
|
DLSS 4:
|
||||||
|
• Supported
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
16. Storage Expansion Capability
|
||||||
|
|
||||||
|
Supports:
|
||||||
|
• Dual NVMe SSD
|
||||||
|
• RAID configuration
|
||||||
|
• Upgradable storage
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
17. Price
|
||||||
|
|
||||||
|
Configured model price:
|
||||||
|
|
||||||
|
$5,199.99 USD 
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
18. Performance Tier Classification
|
||||||
|
|
||||||
|
Category Rating
|
||||||
|
Gaming Extreme
|
||||||
|
AI / ML Extreme
|
||||||
|
Software development Extreme
|
||||||
|
Video editing Extreme
|
||||||
|
Desktop replacement Extreme
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
19. Ideal Use Cases for You Specifically (Saravana)
|
||||||
|
|
||||||
|
Based on your AI engineering work:
|
||||||
|
|
||||||
|
Perfect for:
|
||||||
|
• Running multiple local LLMs
|
||||||
|
• CUDA inference pipelines
|
||||||
|
• Running LangGraph agents locally
|
||||||
|
• Running Stable Diffusion
|
||||||
|
• Running Whisper locally
|
||||||
|
• Running multiple Docker containers
|
||||||
|
• Unreal Engine / 3D
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
20. Summary
|
||||||
|
|
||||||
|
This configuration is:
|
||||||
|
|
||||||
|
One of the most powerful laptops available in the world currently
|
||||||
|
|
||||||
|
Equivalent to:
|
||||||
|
• High-end workstation
|
||||||
|
• High-end desktop GPU
|
||||||
|
• Enterprise-grade development machine
|
||||||
|
|
||||||
|
⸻
|
||||||
|
|
||||||
|
If you want, I can also create:
|
||||||
|
|
||||||
|
• Full performance comparison vs desktop RTX 4090
|
||||||
|
• Or benchmark estimates for LLM / Stable Diffusion / coding workloads
|
||||||
|
• Or recommended optimal configuration for your home lab and multi-model setup
|
||||||
372
__LOCAL_LLMs/windows_specific/setup-guide.md
Normal file
372
__LOCAL_LLMs/windows_specific/setup-guide.md
Normal file
@ -0,0 +1,372 @@
|
|||||||
|
# Windows Setup Guide — Local LLM Stack on Razer Blade 18
|
||||||
|
|
||||||
|
> **Hardware:** Razer Blade 18 · Intel Core Ultra 9 275HX · RTX 5090 24 GB GDDR7 · 64 GB DDR5 · 4 TB NVMe
|
||||||
|
> **OS:** Windows 11 Home
|
||||||
|
> **Goal:** Mirror the macOS `__LOCAL_LLMs` stack — Ollama, Whisper, TTS (Orpheus + Qwen3), Mission Control dashboard
|
||||||
|
> **See also:** [razer-blade-18-spec.md](razer-blade-18-spec.md) for full hardware specs
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
### 1. Windows Package Manager
|
||||||
|
|
||||||
|
Install **winget** (ships with Windows 11) and optionally **Scoop** for CLI tools:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
# Verify winget
|
||||||
|
winget --version
|
||||||
|
|
||||||
|
# Install Scoop (optional, useful for dev tools)
|
||||||
|
Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
|
||||||
|
Invoke-RestMethod -Uri https://get.scoop.sh | Invoke-Expression
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. NVIDIA CUDA Toolkit
|
||||||
|
|
||||||
|
The RTX 5090 needs the latest CUDA drivers for GPU-accelerated inference.
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
# Install NVIDIA drivers (latest Game Ready or Studio)
|
||||||
|
winget install --id Nvidia.GeForceExperience
|
||||||
|
|
||||||
|
# Install CUDA Toolkit (required for PyTorch CUDA)
|
||||||
|
winget install --id Nvidia.CUDA
|
||||||
|
# Or download from: https://developer.nvidia.com/cuda-downloads
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
nvidia-smi
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected output should show:
|
||||||
|
|
||||||
|
- **RTX 5090** with **24 GB** VRAM
|
||||||
|
- CUDA version 13.x+
|
||||||
|
|
||||||
|
### 3. Node.js (for Mission Control Dashboard)
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
winget install --id OpenJS.NodeJS.LTS
|
||||||
|
# Verify
|
||||||
|
node --version # should be 20.x+
|
||||||
|
npm --version
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Python 3.12
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
winget install --id Python.Python.3.12
|
||||||
|
# Verify
|
||||||
|
python --version
|
||||||
|
pip --version
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5. Git
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
winget install --id Git.Git
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6. ffmpeg
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
winget install --id Gyan.FFmpeg
|
||||||
|
# Or: scoop install ffmpeg
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. Ollama — LLM Server
|
||||||
|
|
||||||
|
### Install
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
winget install --id Ollama.Ollama
|
||||||
|
```
|
||||||
|
|
||||||
|
Ollama for Windows runs as a background service and automatically uses CUDA (RTX 5090).
|
||||||
|
|
||||||
|
### Verify
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
ollama --version
|
||||||
|
curl http://localhost:11434/api/tags
|
||||||
|
```
|
||||||
|
|
||||||
|
### Download Models
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
# Coding
|
||||||
|
ollama pull qwen2.5-coder:32b # 19 GB — primary coding model
|
||||||
|
ollama pull qwen2.5-coder:7b # 4.7 GB — fast coding
|
||||||
|
|
||||||
|
# Reasoning
|
||||||
|
ollama pull deepseek-r1:32b # 19 GB — chain-of-thought
|
||||||
|
|
||||||
|
# General
|
||||||
|
ollama pull llama3.1:8b # 4.9 GB — fast general tasks
|
||||||
|
|
||||||
|
# TTS
|
||||||
|
ollama pull sematre/orpheus:en # 4 GB — text-to-speech (8 voices)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
ollama list
|
||||||
|
```
|
||||||
|
|
||||||
|
> **Note:** With 24 GB VRAM, Ollama will offload 32B models almost entirely to GPU.
|
||||||
|
> On macOS (48 GB unified), the 32B models run in shared CPU/GPU memory.
|
||||||
|
> On this machine, **GPU inference will be significantly faster** for models that fit in 24 GB VRAM.
|
||||||
|
|
||||||
|
### VRAM Budget (RTX 5090 — 24 GB)
|
||||||
|
|
||||||
|
| Model | VRAM Usage | Fits in GPU? |
|
||||||
|
| ---------------------------- | ---------- | ------------ |
|
||||||
|
| llama3.1:8b | ~5 GB | ✅ Fully |
|
||||||
|
| qwen2.5-coder:7b | ~5 GB | ✅ Fully |
|
||||||
|
| sematre/orpheus:en | ~4 GB | ✅ Fully |
|
||||||
|
| qwen2.5-coder:32b | ~19 GB | ✅ Fully |
|
||||||
|
| deepseek-r1:32b | ~19 GB | ✅ Fully |
|
||||||
|
| Two 7B models simultaneously | ~10 GB | ✅ Both fit |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. Whisper.cpp — Speech-to-Text
|
||||||
|
|
||||||
|
### Option A: Pre-built Binary (Recommended)
|
||||||
|
|
||||||
|
Download the latest release from GitHub:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
# Create whisper directory
|
||||||
|
mkdir "$env:USERPROFILE\whisper-cpp"
|
||||||
|
cd "$env:USERPROFILE\whisper-cpp"
|
||||||
|
|
||||||
|
# Download latest release (CUDA build)
|
||||||
|
# Check: https://github.com/ggerganov/whisper.cpp/releases
|
||||||
|
# Look for: whisper-cublas-bin-x64.zip or whisper-cuda-bin-x64.zip
|
||||||
|
```
|
||||||
|
|
||||||
|
### Option B: Build from Source (CUDA)
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
git clone https://github.com/ggerganov/whisper.cpp.git
|
||||||
|
cd whisper.cpp
|
||||||
|
cmake -B build -DGGML_CUDA=ON
|
||||||
|
cmake --build build --config Release
|
||||||
|
```
|
||||||
|
|
||||||
|
### Download Whisper Model
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
mkdir "$env:USERPROFILE\whisper-models"
|
||||||
|
|
||||||
|
# Download ggml-large-v3-turbo (1.5 GB)
|
||||||
|
curl -L -o "$env:USERPROFILE\whisper-models\ggml-large-v3-turbo.bin" `
|
||||||
|
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo.bin"
|
||||||
|
```
|
||||||
|
|
||||||
|
> **No corporate proxy on this machine** — download directly from `huggingface.co`.
|
||||||
|
> The `hf-mirror.com` workaround is only needed on the corporate MacBook.
|
||||||
|
|
||||||
|
### Verify
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
# Test transcription
|
||||||
|
whisper-cli -m "$env:USERPROFILE\whisper-models\ggml-large-v3-turbo.bin" -f test.wav
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. TTS — Orpheus + Qwen3-TTS
|
||||||
|
|
||||||
|
### 3a. Orpheus TTS (via Ollama)
|
||||||
|
|
||||||
|
Already handled in Step 1 (`ollama pull sematre/orpheus:en`).
|
||||||
|
|
||||||
|
### 3b. SNAC Decoder
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
# Create models directory (match macOS layout)
|
||||||
|
$MODELS = "$PSScriptRoot\models" # or wherever you clone the repo
|
||||||
|
mkdir "$MODELS\snac_24khz" -Force
|
||||||
|
|
||||||
|
# Download SNAC decoder
|
||||||
|
curl -L -o "$MODELS\snac_24khz\config.json" `
|
||||||
|
"https://huggingface.co/hubertsiuzdak/snac_24khz/resolve/main/config.json"
|
||||||
|
curl -L -o "$MODELS\snac_24khz\pytorch_model.bin" `
|
||||||
|
"https://huggingface.co/hubertsiuzdak/snac_24khz/resolve/main/pytorch_model.bin"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3c. Python Venv + Dependencies
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
cd __LOCAL_LLMs
|
||||||
|
|
||||||
|
# Create venv
|
||||||
|
python -m venv .venv-qwen-tts
|
||||||
|
|
||||||
|
# Activate (Windows uses Scripts, not bin)
|
||||||
|
.\.venv-qwen-tts\Scripts\Activate.ps1
|
||||||
|
|
||||||
|
# Install PyTorch with CUDA (NOT MPS — that's Apple only)
|
||||||
|
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
|
||||||
|
|
||||||
|
# Install other deps
|
||||||
|
pip install snac numpy soundfile
|
||||||
|
|
||||||
|
# Verify CUDA
|
||||||
|
python -c "import torch; print(f'CUDA: {torch.cuda.is_available()}, Device: {torch.cuda.get_device_name(0)}')"
|
||||||
|
# Expected: CUDA: True, Device: NVIDIA GeForce RTX 5090 Laptop GPU
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3d. Qwen3-TTS 0.6B
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
$MODELS = ".\models"
|
||||||
|
|
||||||
|
# Tokenizer (~650 MB)
|
||||||
|
mkdir "$MODELS\Qwen3-TTS-Tokenizer-12Hz" -Force
|
||||||
|
foreach ($f in @("config.json", "configuration.json", "preprocessor_config.json")) {
|
||||||
|
curl -L -o "$MODELS\Qwen3-TTS-Tokenizer-12Hz\$f" `
|
||||||
|
"https://huggingface.co/Qwen/Qwen3-TTS-Tokenizer-12Hz/resolve/main/$f"
|
||||||
|
}
|
||||||
|
curl -L -o "$MODELS\Qwen3-TTS-Tokenizer-12Hz\model.safetensors" `
|
||||||
|
"https://huggingface.co/Qwen/Qwen3-TTS-Tokenizer-12Hz/resolve/main/model.safetensors"
|
||||||
|
|
||||||
|
# Model weights (~1.8 GB)
|
||||||
|
mkdir "$MODELS\Qwen3-TTS-12Hz-0.6B-CustomVoice" -Force
|
||||||
|
foreach ($f in @("config.json", "generation_config.json")) {
|
||||||
|
curl -L -o "$MODELS\Qwen3-TTS-12Hz-0.6B-CustomVoice\$f" `
|
||||||
|
"https://huggingface.co/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice/resolve/main/$f"
|
||||||
|
}
|
||||||
|
curl -L -o "$MODELS\Qwen3-TTS-12Hz-0.6B-CustomVoice\model.safetensors" `
|
||||||
|
"https://huggingface.co/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice/resolve/main/model.safetensors"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3e. Test TTS
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
# Activate venv
|
||||||
|
.\.venv-qwen-tts\Scripts\Activate.ps1
|
||||||
|
|
||||||
|
# Orpheus TTS test
|
||||||
|
python test_orpheus_tts.py
|
||||||
|
|
||||||
|
# Qwen3-TTS test
|
||||||
|
python test_qwen_tts.py
|
||||||
|
```
|
||||||
|
|
||||||
|
> **Key difference from macOS:** Qwen3-TTS will use **CUDA** instead of MPS.
|
||||||
|
> In `test_qwen_tts.py`, the device selection `torch.device("mps")` will fall through to CUDA automatically
|
||||||
|
> since `torch.backends.mps.is_available()` returns False on Windows.
|
||||||
|
> You may want to update the device logic to prefer CUDA:
|
||||||
|
>
|
||||||
|
> ```python
|
||||||
|
> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||||
|
> ```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. Mission Control Dashboard
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
cd __LOCAL_LLMs\dashboard
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
npm install
|
||||||
|
|
||||||
|
# Start dev server
|
||||||
|
npm run dev
|
||||||
|
# Open http://localhost:3000
|
||||||
|
```
|
||||||
|
|
||||||
|
The dashboard is pure Next.js — works identically on Windows. The API routes auto-detect:
|
||||||
|
|
||||||
|
- **Ollama** at `localhost:11434`
|
||||||
|
- **Whisper** models in `%USERPROFILE%\whisper-models\`
|
||||||
|
- **TTS** engines (Orpheus, Qwen3-TTS) and Python venv
|
||||||
|
|
||||||
|
### Start Script (PowerShell)
|
||||||
|
|
||||||
|
Use the bash script equivalent:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
# Quick start (manual)
|
||||||
|
ollama serve # if not already running as service
|
||||||
|
cd __LOCAL_LLMs\dashboard
|
||||||
|
npm run dev
|
||||||
|
```
|
||||||
|
|
||||||
|
> TODO: Create `start-dashboard.ps1` as a PowerShell equivalent of `start-dashboard.sh`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. Key Differences: macOS vs Windows
|
||||||
|
|
||||||
|
| Area | macOS (M4 Pro 48 GB) | Windows (Razer Blade 18) |
|
||||||
|
| ------------------- | ----------------------------------- | ------------------------------------- |
|
||||||
|
| **GPU** | Apple Silicon (unified memory, MPS) | RTX 5090 (24 GB VRAM, CUDA) |
|
||||||
|
| **Ollama GPU** | Automatic (Metal) | Automatic (CUDA) |
|
||||||
|
| **VRAM** | Shared from 48 GB RAM | Dedicated 24 GB GDDR7 |
|
||||||
|
| **PyTorch device** | `mps` | `cuda` |
|
||||||
|
| **Whisper install** | `brew install whisper-cpp` | Build from source or download release |
|
||||||
|
| **Python venv** | `bin/activate` | `Scripts\Activate.ps1` |
|
||||||
|
| **Package manager** | Homebrew | winget / scoop |
|
||||||
|
| **Shell** | zsh / bash | PowerShell / cmd |
|
||||||
|
| **Scripts** | `.sh` (bash) | `.ps1` (PowerShell) |
|
||||||
|
| **Model download** | `hf-mirror.com` (corporate proxy) | `huggingface.co` (no proxy) |
|
||||||
|
| **Dashboard** | Identical | Identical |
|
||||||
|
| **Ollama models** | Identical | Identical |
|
||||||
|
|
||||||
|
### Performance Expectations
|
||||||
|
|
||||||
|
| Workload | macOS M4 Pro 48 GB | Razer RTX 5090 24 GB |
|
||||||
|
| --------------------------- | ---------------------------- | ------------------------- |
|
||||||
|
| qwen2.5-coder:32b inference | ~15–25 tok/s (MPS/CPU blend) | ~40–60 tok/s (full CUDA) |
|
||||||
|
| Whisper large-v3-turbo | ~2–4x realtime (CPU) | ~8–15x realtime (CUDA) |
|
||||||
|
| Orpheus TTS | ~realtime (CPU decode) | ~2–3x realtime (CUDA) |
|
||||||
|
| Qwen3-TTS | ~realtime (MPS) | ~2–4x realtime (CUDA) |
|
||||||
|
| 70B quantized models | Fits in 48 GB (slow) | Partially offloads to RAM |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. File Layout (Same as macOS)
|
||||||
|
|
||||||
|
```
|
||||||
|
__LOCAL_LLMs/
|
||||||
|
├── dashboard/ ← Mission Control (port 3000) — works as-is
|
||||||
|
├── models/ ← TTS model weights (gitignored)
|
||||||
|
│ ├── snac_24khz/
|
||||||
|
│ ├── Qwen3-TTS-Tokenizer-12Hz/
|
||||||
|
│ └── Qwen3-TTS-12Hz-0.6B-CustomVoice/
|
||||||
|
├── .venv-qwen-tts/ ← Python venv (Scripts\ on Windows)
|
||||||
|
├── test_orpheus_tts.py ← works as-is (device fallback)
|
||||||
|
├── test_qwen_tts.py ← update device to prefer CUDA
|
||||||
|
├── windows_specific/
|
||||||
|
│ ├── razer-blade-18-spec.md ← hardware spec
|
||||||
|
│ └── setup-guide.md ← this file
|
||||||
|
└── docs/ ← macOS-focused docs (still useful as reference)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. Quick Reference — Full Setup Checklist
|
||||||
|
|
||||||
|
```
|
||||||
|
[ ] Install NVIDIA drivers + CUDA Toolkit
|
||||||
|
[ ] Install Ollama (winget install Ollama.Ollama)
|
||||||
|
[ ] Pull models: qwen2.5-coder:32b, deepseek-r1:32b, llama3.1:8b, orpheus
|
||||||
|
[ ] Install Node.js 20+ (winget)
|
||||||
|
[ ] Install Python 3.12 (winget)
|
||||||
|
[ ] Install Git (winget)
|
||||||
|
[ ] Install ffmpeg (winget)
|
||||||
|
[ ] Clone repo
|
||||||
|
[ ] Download Whisper model to %USERPROFILE%\whisper-models\
|
||||||
|
[ ] Build or download whisper-cpp with CUDA
|
||||||
|
[ ] Create Python venv + install PyTorch CUDA + snac
|
||||||
|
[ ] Download SNAC decoder
|
||||||
|
[ ] Download Qwen3-TTS tokenizer + model
|
||||||
|
[ ] npm install in dashboard/
|
||||||
|
[ ] Run dashboard: npm run dev
|
||||||
|
[ ] Verify: http://localhost:3000 shows all green
|
||||||
|
```
|
||||||
Loading…
Reference in New Issue
Block a user