fix(backend): harden LLM error handling — retry, timeout, missing key guards

2026-04-06 11:09:08 -07:00 · 2026-04-06 11:09:08 -07:00 · c71b01681f
commit c71b01681f
parent b8bc096adb
3 changed files with 89 additions and 37 deletions
--- a/backend/src/lib/copilot-transform.ts
+++ b/backend/src/lib/copilot-transform.ts
@ -6,6 +6,13 @@

 import { llm } from './llm.js';

+function withTimeout<T>(promise: Promise<T>, ms: number): Promise<T> {
+  return Promise.race([
+    promise,
+    new Promise<never>((_, reject) => setTimeout(() => reject(new Error('LLM request timed out')), ms)),
+  ]);
+}
+
 export type CopilotAction = 'shorten' | 'expand' | 'bulletize' | 'grammar' | 'fix-rewrite' | 'change-tone' | 'continue' | 'explain';

 const SYSTEM_PROMPTS: Record<CopilotAction, string> = {
@ -51,19 +58,30 @@ export async function runCopilotTransform(action: CopilotAction, text: string):
    return fallbackTransform(action, text);
  }

-  try {
-    const result = await provider.chatCompletion({
-      messages: [
-        { role: 'system', content: SYSTEM_PROMPTS[action] },
-        { role: 'user', content: text },
-      ],
-      temperature: 0.3,
-      maxTokens: 4096,
-    });
-    const out = result.content.trim();
-    if (out.length > 0) return out;
-  } catch {
-    // fall through to local heuristics
+  const maxRetries = 3;
+  const baseDelayMs = 1000;
+
+  for (let attempt = 0; attempt < maxRetries; attempt++) {
+    try {
+      const result = await withTimeout(provider.chatCompletion({
+        messages: [
+          { role: 'system', content: SYSTEM_PROMPTS[action] },
+          { role: 'user', content: text },
+        ],
+        temperature: 0.3,
+        maxTokens: 4096,
+      }), 60_000);
+      const out = result.content.trim();
+      if (out.length > 0) return out;
+      break; // empty response — fall through to heuristic
+    } catch (err: unknown) {
+      const isRateLimit = err instanceof Error && (err.message.includes('429') || err.message.includes('rate'));
+      if (isRateLimit && attempt < maxRetries - 1) {
+        await new Promise((r) => setTimeout(r, baseDelayMs * Math.pow(2, attempt)));
+        continue;
+      }
+      break; // non-retriable error — fall through to heuristic
+    }
  }
  return fallbackTransform(action, text);
 }
@ -76,18 +94,18 @@ export async function suggestTitleFromBody(body: string): Promise<string> {
  }

  try {
-    const result = await provider.chatCompletion({
+    const result = await withTimeout(provider.chatCompletion({
      messages: [
        { role: 'system', content: 'Suggest a concise, descriptive title (max 8 words). Return only the title, no quotes.' },
        { role: 'user', content: plain.slice(0, 4000) },
      ],
      temperature: 0.6,
      maxTokens: 64,
-    });
+    }), 60_000);
    const t = result.content.trim();
    if (t.length > 0 && t.length < 500) return t;
  } catch {
-    // fall through
+    // timeout or LLM error — fall through to heuristic
  }
  return plain.split(/[.!?]/)[0]?.trim().slice(0, 80) || 'Untitled note';
 }
--- a/backend/src/modules/note-prompts/runner.ts
+++ b/backend/src/modules/note-prompts/runner.ts
@ -11,6 +11,13 @@ import {
 } from '@bytelyst/llm';
 import type { PromptTemplateDoc, RunPromptInput, RunPromptOutput } from './types.js';

+function withTimeout<T>(promise: Promise<T>, ms: number): Promise<T> {
+  return Promise.race([
+    promise,
+    new Promise<never>((_, reject) => setTimeout(() => reject(new Error('LLM request timed out')), ms)),
+  ]);
+}
+
 /**
 * Interpolate {{variable}} placeholders in a template string.
 */
@ -29,6 +36,10 @@ export async function executePrompt(
 ): Promise<RunPromptOutput> {
  const provider = llm();

+  if (!provider.isConfigured()) {
+    throw new Error('LLM provider is not configured. Set LLM_PROVIDER and the required API key.');
+  }
+
  // Build variables map
  const vars: Record<string, string> = {
    ...input.variables,
@ -58,27 +69,49 @@ export async function executePrompt(
    model = config.LLM_VISION_MODEL;
  }

-  const result = await provider.chatCompletion({
-    messages,
-    model,
-    temperature: template.temperature ?? 0.7,
-    maxTokens: template.maxTokens ?? 4096,
-  });
+  const maxRetries = 3;
+  const baseDelayMs = 1000;
+  let lastError: unknown;

-  const output: RunPromptOutput = {
-    content: result.content,
-    model: result.model,
-    usage: result.usage,
-    templateSlug: template.slug,
-    outputType: template.outputType,
-  };
+  for (let attempt = 0; attempt < maxRetries; attempt++) {
+    try {
+      const result = await withTimeout(provider.chatCompletion({
+        messages,
+        model,
+        temperature: template.temperature ?? 0.7,
+        maxTokens: template.maxTokens ?? 4096,
+      }), 60_000);

-  // F27: Approval-gated actions — produce proposed state instead of applied
-  if (template.requiresApproval) {
-    output.approvalState = 'proposed';
-  } else {
-    output.approvalState = 'applied';
+      if (!result.content || result.content.trim().length === 0) {
+        throw new Error('LLM returned empty response');
+      }
+
+      const output: RunPromptOutput = {
+        content: result.content,
+        model: result.model,
+        usage: result.usage,
+        templateSlug: template.slug,
+        outputType: template.outputType,
+      };
+
+      // F27: Approval-gated actions — produce proposed state instead of applied
+      if (template.requiresApproval) {
+        output.approvalState = 'proposed';
+      } else {
+        output.approvalState = 'applied';
+      }
+
+      return output;
+    } catch (err: unknown) {
+      lastError = err;
+      const isRateLimit = err instanceof Error && (err.message.includes('429') || err.message.includes('rate'));
+      if (isRateLimit && attempt < maxRetries - 1) {
+        await new Promise((r) => setTimeout(r, baseDelayMs * Math.pow(2, attempt)));
+        continue;
+      }
+      break;
+    }
  }

-  return output;
+  throw lastError instanceof Error ? lastError : new Error('LLM call failed after retries');
 }
--- a/backend/src/modules/note-prompts/scheduler.ts
+++ b/backend/src/modules/note-prompts/scheduler.ts
@ -212,8 +212,9 @@ export async function runSchedulerTick(): Promise<number> {
      });

      ran++;
-    } catch {
-      // Log but don't break the loop
+    } catch (err: unknown) {
+      const msg = err instanceof Error ? err.message : 'Unknown scheduler error';
+      process.stderr.write(`[scheduler] Failed to run schedule ${schedule.id}: ${msg}\n`);
    }
  }