diff --git a/__LOCAL_LLMs/dashboard/docs/RICH_FEATURES_PRD.md b/__LOCAL_LLMs/dashboard/docs/RICH_FEATURES_PRD.md
index 10786f6a..fd6244fb 100644
--- a/__LOCAL_LLMs/dashboard/docs/RICH_FEATURES_PRD.md
+++ b/__LOCAL_LLMs/dashboard/docs/RICH_FEATURES_PRD.md
@@ -1,7 +1,7 @@
 # Rich LLM Interaction Features — Product Requirements Document
 
 > **Local LLM Workspace** — Evolving Mission Control from a model management dashboard into a full-featured local AI workspace.
-> Last updated: Feb 20, 2026
+> Last updated: Feb 20, 2026 (rev 2 — reviewed and expanded)
 >
 > See also: [DASHBOARD_PRD.md](DASHBOARD_PRD.md) · [DASHBOARD_ROADMAP.md](DASHBOARD_ROADMAP.md)
 
@@ -26,6 +26,29 @@ Mission Control today is a **model management dashboard** — load, unload, prom
 | Text-only input              | Multi-modal: voice (Whisper), vision, files, URLs      |
 | Single model per chat        | Multi-model orchestration (chain, race, vote)          |
 
+### Target Users
+
+| Persona                            | Description                                                                       | Primary needs                                                                                  |
+| ---------------------------------- | --------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------- |
+| **Solo developer**                 | Uses LLMs for code review, debugging, generation daily. Has 3–5 models installed. | Fast switching between coding/reasoning models, Quick Actions for code tasks, file attachments |
+| **AI tinkerer**                    | Experiments with new models weekly. Compares outputs, benchmarks quality.         | Model router, race/vote orchestration, rating system, benchmarks                               |
+| **Privacy-conscious professional** | Lawyer, doctor, or researcher. Needs LLMs but cannot send data to cloud.          | 100% local, no telemetry, conversation export, project isolation                               |
+| **Writer / content creator**       | Uses LLMs for drafts, rewrites, brainstorming. Long multi-turn sessions.          | Persistent conversations, Writing Editor agent, branch/fork for variants                       |
+| **Power user / automation**        | Wants LLMs integrated into daily workflows. Cron jobs, shell integration.         | Scheduled tasks, command input sources, file output                                            |
+
+### Non-Goals (v4 Scope)
+
+The following are explicitly **out of scope** for v4. They may be revisited in v5+.
+
+- **Cloud model integration** — No OpenAI/Anthropic/Google API support. This is a local-only tool.
+- **Multi-user / auth** — Single-user, localhost only. No login, no sharing, no collaboration.
+- **Sandboxed code execution** — Running model-generated code is high-risk. Deferred to v5 with proper sandboxing.
+- **RAG / vector database** — Persistent agent memory across conversations requires a vector store (ChromaDB, etc.). Deferred.
+- **Mobile app** — No iOS/Android companion. LAN-based remote access is a v5 consideration.
+- **Plugin/extension API** — No third-party integrations or user-written tools. Deferred.
+- **Fine-tuning UI** — No model training or LoRA management. Use Ollama CLI directly.
+- **Web search** — No SearXNG or web scraping integration. Deferred.
+
 ---
 
 ## 2. Information Architecture
@@ -79,14 +102,17 @@ Conversations become the primary interaction unit — persistent, named, searcha
 #### Data Model
 
 ```typescript
+// Conversations and messages stored SEPARATELY in IndexedDB for query performance.
+// A conversation with 200 messages loads the header instantly; messages paginate.
+
 interface Conversation {
   id: string; // crypto.randomUUID()
-  title: string; // auto-generated, editable
-  model: string;
-  agentId?: string;
-  projectId?: string;
-  systemPrompt?: string;
-  messages: Message[];
+  title: string; // auto-generated from first message, user-editable
+  model: string; // primary model (last used)
+  agentId?: string; // if started from an Agent
+  projectId?: string; // folder grouping
+  systemPrompt?: string; // conversation-level override
+  messageCount: number; // denormalized for sidebar display
   createdAt: number;
   updatedAt: number;
   pinned: boolean;
@@ -95,31 +121,47 @@ interface Conversation {
     totalTokens: number;
     totalPrompts: number;
     avgTokPerSec: number;
-    models: string[]; // all models used
+    models: string[]; // all models used in this conversation
   };
 }
 
+// Stored in separate IndexedDB object store, indexed by [conversationId, timestamp]
 interface Message {
   id: string;
+  conversationId: string; // FK to Conversation.id
   role: 'user' | 'assistant' | 'system';
   content: string;
-  model?: string;
+  model?: string; // which model generated this response
   timestamp: number;
   attachments?: Attachment[];
-  metrics?: { tokensPerSec: number; totalTokens: number; durationMs: number };
-  rating?: 'up' | 'down';
-  parentId?: string; // for branching
-  branchIndex?: number;
+  metrics?: {
+    tokensPerSec: number;
+    totalTokens: number;
+    promptTokens: number; // input tokens consumed
+    durationMs: number;
+  };
+  rating?: 'up' | 'down'; // user feedback
+  parentId?: string; // for branching (null = linear, set = branch root)
+  branchIndex?: number; // which variant (0, 1, 2...) when regenerated
 }
 
 interface Attachment {
   type: 'image' | 'file' | 'audio' | 'url';
   name: string;
-  data: string; // base64 or text content
+  data: string; // base64 for images, text content for files
   mimeType?: string;
+  size?: number; // bytes, for display
+  language?: string; // detected language for code files
 }
 ```
 
+> **Design decision:** Messages are stored in a separate IndexedDB object store (not inline in Conversation) so that:
+>
+> 1. Sidebar loads instantly (only conversation headers)
+> 2. Long conversations (200+ messages) paginate efficiently
+> 3. Full-text search can index messages independently
+> 4. Branch queries use the `[conversationId, parentId]` index
+
 #### Lifecycle
 
 1. **Create** — "New Conversation" button, Quick Action, or Agent launch
@@ -140,9 +182,32 @@ interface Attachment {
 
 #### Storage
 
-- **IndexedDB** via `idb` library (localStorage too small for full history)
+- **IndexedDB** via `idb` library (~1KB gzipped, thin wrapper over native API)
 - Migration from existing `llm-chat-*` and `llm-inference-log` on first v4 load
 - Export: `llm-conversations-backup-{date}.json`
+- **Size budget:** IndexedDB has no hard limit on most browsers, but we cap at 500 conversations (auto-archive oldest). Individual messages have no cap.
+
+#### Context Window Management
+
+Local models have limited context windows (typically 2K–128K tokens). The system must handle this gracefully:
+
+1. **Token counting:** Use a fast client-side tokenizer estimate (word count × 1.3 for English) to track approximate context usage per conversation.
+2. **Context bar:** Show a subtle progress bar in the conversation header: `4.2K / 32K tokens`.
+3. **Auto-truncation:** When context exceeds 80% of the model's window, show a warning. At 95%, automatically summarize older messages into a condensed system message (using the same model) and continue.
+4. **Manual truncation:** User can click "Compact history" to trigger summarization at any time.
+5. **Context-aware model switching:** If a conversation outgrows a small model's context, suggest switching to a model with a larger window.
+
+#### Acceptance Criteria
+
+- [ ] Conversations persist across page refreshes and browser restarts
+- [ ] Auto-title generates within 2s of first response using fast model
+- [ ] Sidebar shows conversations grouped by time period with correct counts
+- [ ] Full-text search returns results within 200ms for 500 conversations
+- [ ] Branch navigation shows `← 1/3 →` arrows on regenerated messages
+- [ ] Conversations exportable as Markdown with metadata header
+- [ ] Context usage bar visible in conversation header
+- [ ] Migration from v3 localStorage data completes without data loss
+- [ ] Archived conversations hidden from sidebar but searchable
 
 ---
 
@@ -230,9 +295,28 @@ interface ModelDefaults {
 
 Auto-detection: scan installed models, match by name pattern. User overrides in Settings.
 
-#### Command Palette
+#### Command Palette (`Cmd+K`)
 
-`Cmd+K` opens a fuzzy-search palette across all Quick Actions, Agents, and recent conversations. Under 100ms response time.
+The command palette is the power-user entry point. It unifies access to everything:
+
+- **Quick Actions** — fuzzy search by name or category
+- **Agents** — start conversation with any agent
+- **Recent conversations** — resume by title search
+- **Models** — switch model mid-conversation
+- **System commands** — "Export", "Settings", "Mission Control"
+
+Opens in <100ms. Results ranked by: exact match > starts-with > fuzzy > recently used.
+
+#### Acceptance Criteria
+
+- [ ] 30+ built-in Quick Actions across 5 categories
+- [ ] `Cmd+K` command palette opens in <100ms with fuzzy search
+- [ ] Quick Action creates new conversation with pre-filled system prompt and template
+- [ ] User can create, edit, duplicate, and delete custom Quick Actions
+- [ ] Model hint resolves to actual installed model (fallback: first loaded > first installed)
+- [ ] Usage tracking surfaces top 5 "frequently used" actions in sidebar
+- [ ] Actions included in settings export/import (F29 compatibility)
+- [ ] Template placeholders `{...}` highlighted in the input for easy replacement
 
 ---
 
@@ -262,12 +346,12 @@ interface Agent {
 }
 
 type AgentTool =
-  | 'file_read'
-  | 'vision'
-  | 'voice_input'
-  | 'web_search'
-  | 'code_execution'
-  | 'memory';
+  | 'file_read' // v4: read attached files and include in context
+  | 'vision' // v4: process images via vision-capable models
+  | 'voice_input' // v4: Whisper transcription input
+  | 'web_search' // v5 (deferred): local web search
+  | 'code_execution' // v5 (deferred): sandboxed code runner
+  | 'memory'; // v5 (deferred): persistent memory across conversations
 ```
 
 #### Built-in Agents (10)
@@ -295,7 +379,18 @@ type AgentTool =
 
 #### Agent Editor
 
-Full-screen editor: icon, name, model, temperature, system prompt (large textarea), welcome message, example prompts (tag input), tool toggles, constraints list.
+Full-screen editor: icon, name, model, temperature, system prompt (large textarea), welcome message, example prompts (tag input), tool toggles (v4 tools only), constraints list.
+
+#### Acceptance Criteria
+
+- [ ] 10 built-in agents covering code, writing, reasoning, creative, security
+- [ ] Agent editor with all fields persisted to IndexedDB
+- [ ] Agent conversations show badge in header and welcome message
+- [ ] Example prompts rendered as clickable chips below input bar
+- [ ] Agents included in settings export/import
+- [ ] Agent conversation count tracked and displayed in agent picker
+- [ ] Custom agents can be duplicated from built-in agents as starting point
+- [ ] Deleting an agent does NOT delete its conversations (conversations become "unlinked")
 
 ---
 
@@ -304,20 +399,98 @@ Full-screen editor: icon, name, model, temperature, system prompt (large textare
 Heuristic-based classifier picks the best model for the task — no LLM call, runs in <5ms.
 
 ````typescript
-function classifyTask(input: string): TaskType {
+type TaskType =
+  | 'code'
+  | 'code_review'
+  | 'debugging'
+  | 'reasoning'
+  | 'math'
+  | 'simple'
+  | 'translation'
+  | 'creative'
+  | 'general';
+
+function classifyTask(input: string, attachments?: Attachment[]): TaskType {
+  // Priority 1: Attachments override text classification
+  if (attachments?.some(a => a.type === 'image')) return 'code'; // vision model handles
+
   const lower = input.toLowerCase();
-  if (/```|function |class |const |import |def |=>/.test(input)) return 'code';
-  if (/review|refactor|debug|fix|bug/.test(lower)) return 'code_review';
-  if (/error:|traceback|exception|crash/.test(lower)) return 'debugging';
-  if (/think.*through|step.by.step|analyze|compare|why/.test(lower)) return 'reasoning';
-  if (/calculate|math|equation|proof/.test(lower)) return 'math';
-  if (input.length < 60) return 'simple';
-  if (/translate|translation/.test(lower)) return 'translation';
+
+  // Priority 2: Structural signals (code blocks, error traces)
+  if (/```|function |class |const |import |def |=>|\{\s*\n/.test(input)) return 'code';
+  if (/error:|traceback|exception|crashed?|ENOENT|SIGTERM/.test(lower)) return 'debugging';
+
+  // Priority 3: Intent signals
+  if (/review|refactor|optimize|lint/.test(lower)) return 'code_review';
+  if (/debug|fix.*bug|why.*(fail|break|wrong)/.test(lower)) return 'debugging';
+  if (/think.*through|step.by.step|analyze|compare|pros.*cons|trade.?off/.test(lower))
+    return 'reasoning';
+  if (/calculate|math|equation|proof|probability|statistic/.test(lower)) return 'math';
+  if (/translate|translation|in (spanish|french|german|japanese|chinese|korean)/.test(lower))
+    return 'translation';
+  if (/brainstorm|creative|story|poem|write.*about|imagine/.test(lower)) return 'creative';
+
+  // Priority 4: Length heuristic
+  if (input.length < 80) return 'simple';
+
   return 'general';
 }
+
+function resolveModel(
+  taskType: TaskType,
+  defaults: ModelDefaults,
+  loadedModels: string[],
+  installedModels: string[]
+): { model: string; reason: string; loaded: boolean } {
+  const preferred = defaults[taskTypeToHint(taskType)];
+
+  // Best case: preferred model is already loaded in RAM
+  if (loadedModels.includes(preferred)) {
+    return { model: preferred, reason: `${taskType} detected`, loaded: true };
+  }
+
+  // Fallback 1: any loaded model that matches the hint category
+  const loadedMatch = loadedModels.find(m => matchesHint(m, taskTypeToHint(taskType)));
+  if (loadedMatch) {
+    return {
+      model: loadedMatch,
+      reason: `${taskType} detected (using loaded model)`,
+      loaded: true,
+    };
+  }
+
+  // Fallback 2: preferred model exists but needs loading
+  if (installedModels.includes(preferred)) {
+    return { model: preferred, reason: `${taskType} detected (will load model)`, loaded: false };
+  }
+
+  // Fallback 3: first loaded model
+  if (loadedModels.length > 0) {
+    return { model: loadedModels[0], reason: 'Using currently loaded model', loaded: true };
+  }
+
+  // Fallback 4: first installed model
+  return { model: installedModels[0], reason: 'Using first available model', loaded: false };
+}
 ````
 
-**UI:** Model chip shows `🤖 Auto` → resolves on send → "Routed to deepseek-r1:32b (reasoning detected)". Click to override. Configurable defaults in Settings.
+**UI behavior:**
+
+- **Auto mode (default):** Model chip shows `🤖 Auto` → resolves on send → toast: "Routed to deepseek-r1:32b (reasoning detected)"
+- **Manual override:** Click model chip to pick a specific model (sticky for that conversation)
+- **Explanation tooltip:** Hover over routed model shows classification reason
+- **Load warning:** If the selected model isn't loaded, show "Model will be loaded (~8s)" before sending
+- **Settings:** Configure model defaults per task type in Settings → Model Router
+
+#### Acceptance Criteria
+
+- [ ] Heuristic classifier runs in <5ms (no LLM call, pure regex)
+- [ ] Auto mode shown as default for new conversations (not Agent conversations)
+- [ ] Routing decision explained on hover tooltip
+- [ ] User can override per-conversation; override persists
+- [ ] Model defaults configurable in Settings (5 categories: fast, coding, reasoning, chat, vision)
+- [ ] Auto-detection fills defaults on first run by scanning installed model names
+- [ ] Graceful fallback chain: preferred loaded → category loaded → preferred installed → any loaded → any installed
 
 ---
 
@@ -340,7 +513,19 @@ Rich input bar replacing the single textarea:
 | **Paste error**                       | Auto-detect stack trace, suggest "Debug Error" |
 | **Drag-and-drop**                     | Drop zone overlay, adds as attachment          |
 
-**Voice:** Click 🎤 or `Cmd+Shift+V` → record → `/api/whisper/transcribe` → fills textarea.
+**Voice:** Click 🎤 or `Cmd+Shift+V` → record via Web Audio API (MediaRecorder) → POST to `/api/whisper/transcribe` → transcription fills textarea → user can edit before sending.
+
+#### Acceptance Criteria
+
+- [ ] File picker supports `.ts`, `.py`, `.go`, `.rs`, `.js`, `.json`, `.md`, `.txt`, `.csv`, `.yaml`
+- [ ] Code files show syntax-highlighted collapsible preview before sending
+- [ ] Images display as thumbnails with file size; auto-select vision model if not already selected
+- [ ] Audio files sent to Whisper; transcription fills input (requires Whisper.cpp installed)
+- [ ] Paste image from clipboard (Cmd+V) creates inline attachment
+- [ ] Paste code auto-detected and wrapped in fenced code block with language
+- [ ] Drag-and-drop shows visual overlay; supports multiple files
+- [ ] Attachment size limit: 10MB per file, configurable in Settings
+- [ ] Graceful error if Whisper not installed: "Whisper.cpp not found — install via brew"
 
 ---
 
@@ -376,7 +561,23 @@ Regenerate keeps original (dimmed), streams new variant, shows `← 1 of 2 →`
 
 #### Rating → Model Quality Profile
 
-👍/👎 per response → aggregate per model → show in model cards: "87% positive on code tasks"
+👍/👎 per response → aggregate per model per task type → stored in IndexedDB `modelRatings` store.
+
+Display in Mission Control model cards: "87% positive on code tasks (23 ratings)".
+Surface in model router: prefer models with higher ratings for their category.
+
+#### Acceptance Criteria
+
+- [ ] Per-message action bar appears on hover (not always visible, to avoid clutter)
+- [ ] Copy button copies full message text (markdown source, not rendered HTML)
+- [ ] Per-code-block copy button copies only the code (not language label or fences)
+- [ ] Regenerate creates branch; original preserved with dimmed styling
+- [ ] Branch navigation `← 1 of N →` shown when message has multiple variants
+- [ ] "Try with other model" dropdown shows currently loaded models
+- [ ] Live token counter and tok/s visible during streaming
+- [ ] Stop button prominently visible during streaming (red, larger than current)
+- [ ] 👍/👎 persisted per message; aggregated view in Mission Control model cards
+- [ ] Rating data included in settings export
 
 ---
 
@@ -408,23 +609,123 @@ interface ScheduledTask {
 }
 ```
 
-**Built-in templates:** Morning Brief, Git Diff Summary, Dependency Audit, README Updater, Daily Learning Prompt.
+#### Built-in Templates
 
-**Constraints:** Dashboard tab must be open (browser cron). Optional `launchd` background service for macOS. 60s timeout per task. Loads model if needed.
+| Template             | Schedule      | Input Source       | Output       | Use Case                                  |
+| -------------------- | ------------- | ------------------ | ------------ | ----------------------------------------- |
+| **Morning Brief**    | Weekdays 8 AM | Static prompt      | Notification | "What should a developer focus on today?" |
+| **Git Diff Summary** | Weekdays 5 PM | `git diff --stat`  | Conversation | Summarize today's code changes            |
+| **Dependency Audit** | Mondays 10 AM | `npm audit --json` | Conversation | Weekly security scan summary              |
+| **README Freshness** | 1st of month  | File: `README.md`  | File output  | Monthly README review                     |
+| **Daily Learning**   | Daily 12 PM   | Static prompt      | Notification | "Teach me one useful programming concept" |
+
+#### Runtime Constraints
+
+- **Browser-based:** Uses `setInterval` with cron matching. Dashboard tab **must be open** for tasks to fire.
+- **Background alternative (future):** Optional Node.js process managed via macOS `launchd` plist. Would allow tasks to run when dashboard is closed. Deferred to v4.1.
+- **Model loading:** Task checks if model is loaded. If not, sends load request, waits up to 30s, then runs. Unloads after if it wasn't loaded before.
+- **Timeout:** 60s max per task. Abort and log error if exceeded.
+- **Notification:** Uses browser `Notification` API (requests permission on first task creation).
+- **Security:** Shell command input source is opt-in per task. Only `stdout` is captured (not `stderr`). Commands run via `execFile` (no shell injection).
+
+#### Task Editor UI
+
+Modal editor with sections: name, schedule (preset picker + custom cron input with human-readable preview), model selector, input source radio (static / shell command / file path / clipboard), prompt textarea with `{input}` placeholder, output action dropdown, enable/disable toggle, "Run Now" test button.
+
+#### Acceptance Criteria
+
+- [ ] Cron expression validates and shows human-readable schedule ("Every weekday at 9:00 AM")
+- [ ] Tasks fire when dashboard tab is open and schedule matches (checked every 60s)
+- [ ] "Run Now" button for immediate testing with result preview
+- [ ] Run history shows last 10 executions with duration, token count, and success/error
+- [ ] Shell command input captures stdout safely via server-side API route
+- [ ] Browser notification on task completion (with permission request)
+- [ ] 5 built-in templates included as starting points
+- [ ] Tasks included in settings export/import
+- [ ] Disabled tasks don't fire but are preserved
 
 ---
 
 ### 3.8 Multi-Model Orchestration
 
-Three modes for using multiple models together:
+Three modes for combining multiple models on a single task.
 
-**Chain** — Sequential pipeline: model A reasons → model B codes from that reasoning.
+#### Chain Mode — Sequential Pipeline
 
-**Race** — Same prompt to N models simultaneously. Show all results with timing. User picks best.
+Output of model A becomes input to model B. Each step can have its own system prompt.
 
-**Vote** — 3+ models answer, then a synthesizer model produces consensus with disagreement notes.
+```
+User prompt
+  → Step 1: deepseek-r1:32b (system: "Analyze the problem and plan an approach")
+  → Step 2: qwen2.5-coder:32b (system: "Implement the plan from the previous step")
+  → Final response shown to user (with expandable intermediate steps)
+```
 
-Orchestrations can be saved as reusable Quick Actions.
+**Use cases:** Architecture decision → implementation, problem analysis → solution code, outline → full text.
+
+#### Race Mode — Parallel Competition
+
+Same prompt sent to N models simultaneously. All results displayed side-by-side with timing metrics.
+
+```
+User prompt
+  → llama3.1:8b    (3.2s, 42 tok/s)  ← fastest
+  → qwen2.5:7b     (4.1s, 38 tok/s)
+  → mistral:7b     (3.8s, 35 tok/s)
+User picks the best response (or takes all)
+```
+
+**Use cases:** Comparing model quality on a task, finding fastest model, choosing best explanation.
+
+#### Vote Mode — Consensus
+
+Same prompt to 3+ models. A designated synthesizer model reads all outputs and produces a consensus, noting disagreements.
+
+```
+User prompt → 3 models answer independently
+  → Synthesizer (reasoning model) reads all 3:
+     "All models agree on X. Models A and B suggest Y, but Model C
+      disagrees because Z. My recommendation: ..."
+```
+
+**Use cases:** Important decisions, fact verification, reducing hallucination risk.
+
+#### Data Model
+
+```typescript
+interface Orchestration {
+  id: string;
+  name: string; // "Reason then Code"
+  mode: 'chain' | 'race' | 'vote';
+  steps: OrchestrationStep[]; // chain: sequential; race/vote: parallel
+  synthesizer?: string; // vote mode: model that produces consensus
+  description?: string;
+}
+
+interface OrchestrationStep {
+  model: string; // or modelHint for flexibility
+  systemPrompt?: string;
+  transformInput?: string; // chain mode: template with {prev} placeholder
+}
+```
+
+Orchestrations are saved as a special type of Quick Action (category: `orchestration`) and appear in the command palette.
+
+#### UI
+
+- **Chain:** Show intermediate steps in collapsible blocks, final answer prominent
+- **Race:** Side-by-side cards with model name, timing, response. "Pick winner" button.
+- **Vote:** Individual responses in tabs, consensus answer at top with attribution
+
+#### Acceptance Criteria
+
+- [ ] Chain mode passes output of step N as `{prev}` placeholder to step N+1
+- [ ] Race mode streams all models in parallel with independent progress indicators
+- [ ] Vote mode synthesizer receives all responses in a structured prompt
+- [ ] All modes show timing metrics per model
+- [ ] Orchestrations saveable as reusable Quick Actions
+- [ ] At least 2 models must be installed for orchestration to be available
+- [ ] Chain limited to 5 steps max; Race/Vote limited to 5 models max
 
 ---
 
@@ -443,11 +744,22 @@ interface Project {
 }
 ```
 
-- **System context per project** injected into every conversation
-- **Default model/agent** per project
-- Drag conversations into projects
-- `Cmd+P` project switcher
-- Project-scoped search
+- **System context per project:** Injected as a system message into every conversation in the project. Example: "You are helping with a TypeScript/Fastify backend using Azure Cosmos DB and Zod validation."
+- **Default model/agent:** New conversations in the project inherit these defaults
+- **Drag to organize:** Drag conversations from sidebar into project folders
+- **`Cmd+P` project switcher:** Quick switch between projects, filters sidebar to show only that project's conversations
+- **Project-scoped search:** Search only within a project's conversations
+- **Project stats:** Show total conversations, total tokens, most-used model
+
+#### Acceptance Criteria
+
+- [ ] Create/edit/delete projects with name, icon, description, system context
+- [ ] Project-level model and agent defaults apply to new conversations
+- [ ] Drag-and-drop conversations into/between projects
+- [ ] `Cmd+P` opens project switcher with search
+- [ ] Project system context visible as a badge in conversation header
+- [ ] Removing a conversation from a project moves it back to "Uncategorized"
+- [ ] Projects included in settings export/import
 
 ---
 
@@ -471,24 +783,117 @@ interface Project {
 
 ---
 
-## 5. Storage Architecture
+## 5. Settings Expansion
+
+```
+Settings
+├── General
+│   ├── Theme (dark / light / auto)
+│   ├── Sidebar default state (expanded / collapsed)
+│   ├── Default view on launch (last conversation / new conversation / mission control)
+│   ├── Auto-title conversations (on/off, which model to use)
+│   └── Conversation list density (compact / comfortable)
+│
+├── Model Router
+│   ├── Default model per task type (fast, coding, reasoning, chat, vision)
+│   ├── Auto-routing enabled (on / off, default: on)
+│   └── Show routing explanation (on / off)
+│
+├── Models
+│   ├── Model nicknames (display name overrides, e.g., "deepseek-r1:32b" → "DeepSeek R1")
+│   ├── Per-model default temperature
+│   └── Model notes (free-text per model, shown in expanded details)
+│
+├── Input
+│   ├── Voice input: Whisper model selection (if multiple installed)
+│   ├── Auto-detect paste type (on / off)
+│   └── Max attachment size (default: 10MB)
+│
+├── Scheduled Tasks
+│   ├── Default timeout per task (30s / 60s / 120s)
+│   └── Browser notification preferences
+│
+├── Data
+│   ├── Export all data (conversations + agents + actions + tasks + settings)
+│   ├── Import data (validates, merges, reports conflicts)
+│   ├── Clear all conversations (keep agents + actions)
+│   ├── Clear inference log
+│   └── Factory reset (clear everything, confirm dialog)
+│
+└── About
+    ├── Dashboard version
+    ├── Ollama version (from existing N10 feature)
+    ├── Storage usage (IndexedDB size + localStorage size)
+    └── Model disk usage (from existing system panel)
+```
+
+Settings persist to localStorage (same `llm-*` prefix convention from v3). Complex data (conversations, messages, agents) in IndexedDB.
+
+---
+
+## 6. New API Routes
+
+The existing API routes remain unchanged. The following new routes are needed for v4 features:
+
+| Method | Route                   | Purpose                                                       | Feature             |
+| ------ | ----------------------- | ------------------------------------------------------------- | ------------------- |
+| POST   | `/api/ollama/chat`      | Already exists                                                | 3.1 Conversations   |
+| POST   | `/api/ollama/stream`    | Already exists (updated for images in Phase 5)                | 3.5 Multi-Modal     |
+| POST   | `/api/ollama/title`     | Generate conversation title via fast model                    | 3.1 Auto-title      |
+| POST   | `/api/ollama/summarize` | Summarize older messages for context compaction               | 3.1 Context Mgmt    |
+| POST   | `/api/system/exec`      | Execute shell command (for scheduled tasks)                   | 3.7 Cron            |
+| GET    | `/api/system/file`      | Read file content by path (for scheduled tasks + attachments) | 3.7 Cron, 3.5 Input |
+
+**Security notes:**
+
+- `/api/system/exec` is the most sensitive route. Validate command against an allowlist of safe executables (`git`, `npm`, `brew`, `cat`, `ls`, `wc`, `du`, `df`). Block `rm`, `mv`, `chmod`, `sudo`, `curl`, `wget` by default. User can extend allowlist in Settings.
+- `/api/system/file` reads files for context. Limit to configurable base directories. No traversal above home directory.
+- Both routes are localhost-only (same as all existing routes).
+
+---
+
+## 7. Storage Architecture
 
 ### Migration: localStorage → IndexedDB
 
 ```
-IndexedDB (via idb):
-  conversations, messages, agents, quickActions, projects, scheduledTasks, taskRuns
+IndexedDB stores (via idb library, ~1KB gzipped):
+  conversations    ← pk: id, indexes: [updatedAt], [projectId], [archived]
+  messages         ← pk: id, indexes: [conversationId+timestamp], [conversationId+parentId]
+  agents           ← pk: id
+  quickActions     ← pk: id, indexes: [category], [usageCount]
+  projects         ← pk: id
+  scheduledTasks   ← pk: id
+  taskRuns         ← pk: auto, indexes: [taskId+timestamp]
+  modelRatings     ← pk: auto, indexes: [model+taskType]
+  orchestrations   ← pk: id
 
-localStorage (lightweight settings):
+localStorage (lightweight, synchronous access):
   llm-theme, llm-model-defaults, llm-model-sort, llm-auto-load-model,
-  llm-sidebar-state, llm-last-conversation, llm-model-benchmarks
+  llm-sidebar-state, llm-last-conversation, llm-model-benchmarks,
+  llm-settings-*, llm-command-allowlist
 ```
 
-Migration on first v4 load: existing `llm-inference-log` and `llm-chat-*` entries → IndexedDB conversations.
+### Migration Strategy (v3 → v4)
+
+1. On first v4 load, detect existing `llm-*` localStorage keys
+2. Convert `llm-inference-log` entries into Conversation + Message records in IndexedDB
+3. Convert `llm-chat-{model}` entries into Conversation + Message records
+4. Migrate `llm-prompt-history` into a "Prompt History" conversation
+5. Keep `llm-theme`, `llm-model-sort`, `llm-auto-load-model`, `llm-model-benchmarks` in localStorage
+6. Set `llm-migrated-v4: true` flag to prevent re-migration
+7. **Do NOT delete** old keys until user confirms migration was successful
+
+### Size Management
+
+- **Conversation cap:** 500 active conversations. Beyond that, oldest auto-archived.
+- **Message pagination:** Load 50 messages at a time when scrolling up.
+- **Attachment storage:** Images stored as base64 in message data. For conversations with many images, offer "Export & clean attachments" option.
+- **Periodic cleanup:** Prompt user quarterly to archive/delete old conversations if count > 300.
 
 ---
 
-## 6. Component Architecture (v4)
+## 8. Component Architecture (v4)
 
 ```
 src/app/
@@ -524,7 +929,7 @@ src/app/
 
 ---
 
-## 7. Implementation Phases
+## 9. Implementation Phases
 
 | Phase     | Focus                      | Tasks                                                                           | Effort    | Depends On |
 | --------- | -------------------------- | ------------------------------------------------------------------------------- | --------- | ---------- |
@@ -541,61 +946,121 @@ src/app/
 
 ---
 
-## 8. Competitive Differentiation
+## 10. Competitive Differentiation
 
-| Feature             | ChatGPT      | Claude       | **Local (Ours)**               |
-| ------------------- | ------------ | ------------ | ------------------------------ |
-| Privacy             | Cloud        | Cloud        | **100% local**                 |
-| Cost                | $20/mo       | $20/mo       | **Free**                       |
-| Rate limits         | Yes          | Yes          | **None**                       |
-| Model choice        | GPT-4/o/mini | Claude 3/3.5 | **Any Ollama model**           |
-| Custom GPTs         | Yes          | No           | **Custom Agents**              |
-| File upload         | Yes          | Yes          | **+ local file system**        |
-| Voice input         | Yes          | No           | **Whisper.cpp local**          |
-| Scheduled tasks     | No           | No           | **Cron automation**            |
-| Multi-model         | No           | No           | **Chain/Race/Vote**            |
-| Model router        | Hidden       | Hidden       | **Transparent + configurable** |
-| Offline             | No           | No           | **Yes**                        |
-| Hot-swap models     | No           | No           | **Instant model switching**    |
-| Hardware visibility | None         | None         | **Full system monitoring**     |
+### vs. Cloud (ChatGPT, Claude, Gemini)
 
-**Our unique advantages:**
+| Feature             | ChatGPT      | Claude       | Gemini   | **Local (Ours)**               |
+| ------------------- | ------------ | ------------ | -------- | ------------------------------ |
+| Privacy             | Cloud        | Cloud        | Cloud    | **100% local**                 |
+| Cost                | $20/mo       | $20/mo       | $20/mo   | **Free**                       |
+| Rate limits         | Yes          | Yes          | Yes      | **None**                       |
+| Model choice        | GPT-4/o/mini | Claude 3/3.5 | Gemini 2 | **Any Ollama model**           |
+| Custom GPTs         | Yes          | Projects     | Gems     | **Custom Agents**              |
+| File upload         | Yes          | Yes          | Yes      | **+ local file system**        |
+| Voice input         | Yes          | No           | Yes      | **Whisper.cpp local**          |
+| Scheduled tasks     | No           | No           | No       | **Cron automation**            |
+| Multi-model         | No           | No           | No       | **Chain/Race/Vote**            |
+| Model router        | Hidden       | Hidden       | Hidden   | **Transparent + configurable** |
+| Offline             | No           | No           | No       | **Yes**                        |
+| Hot-swap models     | No           | No           | No       | **Instant model switching**    |
+| Hardware visibility | None         | None         | None     | **Full system monitoring**     |
 
-1. **Transparency** — see exactly which model, how fast, how much RAM
-2. **Orchestration** — chain/race/vote across models (impossible with cloud)
-3. **Automation** — scheduled tasks with local file/command integration
-4. **Zero friction** — no login, no API keys, no rate limits, no costs
-5. **Model diversity** — 20+ models from different families, each with strengths
+### vs. Local Competitors (Open WebUI, LM Studio, Jan.ai)
+
+| Feature           | Open WebUI | LM Studio      | Jan.ai         | **Ours**                        |
+| ----------------- | ---------- | -------------- | -------------- | ------------------------------- |
+| Setup             | Docker     | Standalone app | Standalone app | **npm dev server (instant)**    |
+| Model management  | Good       | Excellent      | Good           | **+ RAM estimation, co-load**   |
+| Custom agents     | Basic      | No             | Assistants     | **Full agent editor + tools**   |
+| Quick actions     | No         | No             | No             | **30+ built-in templates**      |
+| Scheduled tasks   | No         | No             | No             | **Cron with shell/file input**  |
+| Multi-model orch. | No         | No             | No             | **Chain/Race/Vote**             |
+| System monitoring | Minimal    | Minimal        | No             | **Full (RAM, disk, GPU, brew)** |
+| Model router      | No         | No             | No             | **Heuristic auto-routing**      |
+| Command palette   | No         | No             | No             | **Cmd+K unified access**        |
+| Developer-focused | General    | General        | General        | **Code-first Quick Actions**    |
+| Extensibility     | Plugins    | No             | Extensions     | **v5 (deferred)**               |
+
+**Our unique position:** Developer-first local AI workspace with the deepest system integration (shell commands, file I/O, cron, hardware monitoring) and the richest multi-model orchestration. We trade breadth (no plugins, no cloud) for depth in the local developer workflow.
+
+**Key differentiators vs. all competitors:**
+
+1. **Orchestration** — chain/race/vote across models (no competitor offers this)
+2. **Automation** — scheduled tasks with local file/command integration
+3. **Transparency** — see exactly which model, how fast, how much RAM, will-it-fit
+4. **Zero friction** — no Docker, no Electron, no API keys, just `npm run dev`
+5. **Code-first** — 30 Quick Actions optimized for developer workflows
 
 ---
 
-## 9. Success Metrics
+## 11. Risks & Mitigations
 
-| Metric                | Target                              | Measurement                         |
-| --------------------- | ----------------------------------- | ----------------------------------- |
-| Conversation creation | >5/day                              | Count in IndexedDB                  |
-| Quick Action usage    | >50% of conversations start from QA | `usageCount` tracking               |
-| Agent adoption        | 3+ agents in regular use            | `conversationCount` per agent       |
-| Auto-routing accuracy | >80% user satisfaction              | Rating on auto-routed conversations |
-| Retention             | Session >10 min                     | Timestamp delta                     |
-| Task completion       | <3 interactions to useful answer    | Message count per conversation      |
+| #   | Risk                                                                          | Impact   | Likelihood | Mitigation                                                                  |
+| --- | ----------------------------------------------------------------------------- | -------- | ---------- | --------------------------------------------------------------------------- |
+| R1  | **IndexedDB data loss** — browser clear, profile reset, or corruption         | High     | Low        | Auto-export backup every 7 days (prompt user). Export includes all data.    |
+| R2  | **Context window overflow** — long conversations silently truncated by Ollama | High     | Medium     | Context bar (3.1), auto-summarization at 80%, model switch suggestion.      |
+| R3  | **Shell command injection** — scheduled task exec route exploited             | Critical | Low        | Allowlist of safe executables, `execFile` (not `exec`), localhost-only.     |
+| R4  | **Model not loaded for cron task** — task fails silently                      | Medium   | Medium     | Auto-load before task, 30s timeout for load, log error if fails.            |
+| R5  | **Bundle size bloat** — new deps (idb, cron parser) increase load time        | Low      | Low        | `idb` is ~1KB. Cron parser is ~2KB. No heavy new deps.                      |
+| R6  | **page.tsx complexity** — already 2,500 lines, v4 adds much more              | High     | High       | Phase A includes component decomposition into route groups. Non-negotiable. |
+| R7  | **Browser tab closed during cron** — scheduled tasks don't fire               | Medium   | High       | Clear UX: show "Dashboard must be open" warning. Offer launchd in v4.1.     |
+| R8  | **Stale auto-title** — fast model generates bad title                         | Low      | Medium     | Title is always editable. "Rename" action in sidebar context menu.          |
+| R9  | **Race condition in Race mode** — parallel streams conflict                   | Medium   | Medium     | Each stream writes to independent state. Merge only on completion.          |
+| R10 | **Privacy leak via scheduled command** — command output sent to model         | Medium   | Low        | Show command output to user before sending to model (opt-in auto-send).     |
 
 ---
 
-## 10. Open Questions
+## 12. Success Metrics
 
-1. **Background cron:** Should scheduled tasks require dashboard open, or should we ship a `launchd` daemon?
-2. **Code execution:** Sandboxed code runner is high-value but high-risk. Scope for v5?
-3. **Web search:** Local web search via SearXNG or similar? Or defer to cloud models?
-4. **Persistent memory:** Agent memory across conversations (RAG-style)? Needs vector DB.
-5. **Mobile companion:** Minimal mobile UI that sends to Mac over LAN?
-6. **Plugin system:** Let users write custom tools in JS? Extension API?
+All metrics are local-only (no telemetry). Measured via IndexedDB queries on the About page.
+
+| Metric                        | Target                               | How Measured                                      | Why It Matters                            |
+| ----------------------------- | ------------------------------------ | ------------------------------------------------- | ----------------------------------------- |
+| Conversations/week            | >15                                  | Count in IndexedDB by `createdAt`                 | Adoption: are people using conversations? |
+| Quick Action usage            | >40% of conversations start from QA  | `agentId` or first message matches template       | Quick Actions reduce friction             |
+| Agent adoption                | 3+ agents with >5 conversations each | `conversationCount` per agent                     | Agents provide lasting value              |
+| Auto-routing override rate    | <30%                                 | Manual model changes in auto-routed conversations | Router is accurate enough                 |
+| Avg messages per conversation | >4                                   | Message count / conversation count                | Multi-turn engagement                     |
+| Branch usage                  | >10% of conversations have branches  | Messages with `branchIndex > 0`                   | Branching is discoverable                 |
+| Scheduled task success rate   | >90%                                 | `success` in `runHistory`                         | Tasks are reliable                        |
 
 ---
 
-## Appendix A: Quick Action Templates (Full Detail)
+## 13. Open Questions
 
-### Code Review
+| #   | Question                                                                                   | Options                                                  | Recommendation                                                                                            |
+| --- | ------------------------------------------------------------------------------------------ | -------------------------------------------------------- | --------------------------------------------------------------------------------------------------------- |
+| Q1  | **Background cron:** Dashboard open vs. launchd daemon?                                    | (a) Browser-only for v4, (b) Ship launchd from day 1     | **(a)** Browser-only for v4. Add launchd in v4.1 after validating demand.                                 |
+| Q2  | **Code execution:** Sandboxed runner for model-generated code?                             | (a) Defer to v5, (b) Simple eval with timeout            | **(a)** Defer. Security implications are high.                                                            |
+| Q3  | **Web search:** Local search via SearXNG?                                                  | (a) Defer, (b) Optional SearXNG integration              | **(a)** Defer. Requires external service. Out of "zero-deps" philosophy.                                  |
+| Q4  | **Persistent memory:** Agent memory across conversations?                                  | (a) Defer (needs vector DB), (b) Simple key-value memory | **(b)** Consider simple key-value "agent notes" in v4 as lightweight alternative to RAG.                  |
+| Q5  | **Mobile companion:** LAN-based access from phone?                                         | (a) Defer, (b) Expose on LAN with `--hostname 0.0.0.0`   | **(a)** Defer. Security concerns with LAN exposure.                                                       |
+| Q6  | **Plugin system:** User-written tools?                                                     | (a) Defer, (b) Simple JS extension API                   | **(a)** Defer. Need to stabilize core features first.                                                     |
+| Q7  | **Conversation sharing:** Export as shareable HTML?                                        | (a) Markdown only, (b) Self-contained HTML               | **(b)** Nice-to-have. Self-contained HTML with embedded CSS = easy sharing. Consider for v4.1.            |
+| Q8  | **Model fine-tuning triggers:** If user rates many responses, offer to create a Modelfile? | (a) Defer, (b) Suggest Modelfile adjustments             | **(b)** Low-effort: if a model gets 10+ ratings, suggest temperature/system prompt tweaks as a new Agent. |
+
+---
+
+## 14. New Dependencies
+
+| Package                    | Size (gzip)  | Purpose                              | Phase |
+| -------------------------- | ------------ | ------------------------------------ | ----- |
+| `idb`                      | ~1KB         | Thin IndexedDB wrapper with Promises | A     |
+| `fuse.js`                  | ~6KB         | Fuzzy search for command palette     | B     |
+| `cron-parser`              | ~3KB         | Parse and validate cron expressions  | F     |
+| `react-markdown`           | ~12KB        | Already installed (Phase 5)          | —     |
+| `react-syntax-highlighter` | ~40KB (lazy) | Already installed (Phase 5)          | —     |
+
+**Total new bundle cost:** ~10KB gzipped. Minimal impact.
+
+---
+
+## Appendix A: Quick Action Templates (Selected Examples)
+
+Full templates for 4 representative actions. The remaining 26 follow the same YAML structure and are defined in `src/app/lib/quick-actions.ts`.
+
+### Code Review (Code category)
 
 ```yaml
 name: Code Review
@@ -608,13 +1073,14 @@ systemPrompt: |
   readability, and maintainability.
   Always reference specific lines. Suggest concrete fixes, not just problems.
   Rate severity: critical, warning, or suggestion.
+  Format output as a numbered list of findings.
 userTemplate: |
   Review this code:
 
   {paste or attach file}
 ```
 
-### Debug Error
+### Debug Error (Code category)
 
 ```yaml
 name: Debug Error
@@ -638,7 +1104,7 @@ userTemplate: |
   {paste relevant code}
 ```
 
-### Deep Think
+### Deep Think (Analysis category)
 
 ```yaml
 name: Deep Think
@@ -656,7 +1122,7 @@ userTemplate: |
   {describe the problem or question}
 ```
 
-### Shell Command
+### Shell Command (DevOps category)
 
 ```yaml
 name: Shell Command
@@ -677,4 +1143,23 @@ userTemplate: |
 
 ---
 
+## Appendix B: Error Handling & Edge Cases
+
+| Scenario                                 | Behavior                                                                                                              |
+| ---------------------------------------- | --------------------------------------------------------------------------------------------------------------------- |
+| No models installed                      | Show "Install a model" CTA linking to Ollama library. Disable all chat features.                                      |
+| Ollama not running                       | Show offline banner (existing). Conversations still browsable but sending disabled.                                   |
+| Model unloaded mid-conversation          | Detect 404 on stream, auto-reload model, retry once. Show toast: "Model reloaded".                                    |
+| IndexedDB full                           | Show warning toast with storage usage. Suggest archiving old conversations.                                           |
+| IndexedDB unavailable (private browsing) | Fall back to localStorage with 50-conversation cap. Show banner: "Limited storage mode".                              |
+| Voice recording fails                    | Show error: "Microphone access denied" or "Recording failed". Fall back to text.                                      |
+| Whisper not installed                    | Voice button disabled with tooltip: "Install whisper-cpp via Homebrew".                                               |
+| Cron task model not installed            | Task fails, logged to run history: "Model not found: xyz". Notification shown.                                        |
+| Chain orchestration step fails           | Abort chain, show partial results with error at failed step.                                                          |
+| Race mode — one model fails              | Show results from successful models. Failed model shows error card.                                                   |
+| Attachment too large                     | Reject with toast: "File exceeds 10MB limit". Don't send.                                                             |
+| Context window exceeded                  | Warning at 80%, auto-summarize at 95%, hard-fail message: "Context full — start new conversation or compact history". |
+
+---
+
 _This PRD is a living document. Update as features are implemented and user feedback arrives._