feat(backend): wire Ollama LLM for context messages (TODO-005)

Dual-path LLM enrichment for AI context prep messages: 1. extraction-service (if EXTRACTION_SERVICE_URL set) 2. Ollama direct (if OLLAMA_URL set) — non-streaming /api/generate 3. Keyword rules fallback 4. Generic fallback New env vars: OLLAMA_URL, OLLAMA_MODEL (default: gemma3:4b) Both LLM paths use 5s timeout and null-return-on-error pattern. Feature-gated behind ai_context_messages.enabled flag.
2026-04-13 17:00:24 -07:00 · 2026-04-13 17:00:24 -07:00 · 229ce4f00f
commit 229ce4f00f
parent 0e7c1aeb15
2 changed files with 35 additions and 16 deletions
--- a/backend/src/lib/ai-context.ts
+++ b/backend/src/lib/ai-context.ts
@ -56,25 +56,13 @@ export interface ContextMessageResult {
  source: 'llm' | 'keyword' | 'generic';
 }

-// ── LLM enrichment (extraction-service or ollama-client) ──
+// ── LLM enrichment (dual path: extraction-service or Ollama) ──

-// TODO-005: Wire real LLM enrichment for context messages
-// Priority: high | Phase: A.4
-// Replace the stub below with a real LLM call. Two options:
-//   Option A: @bytelyst/extraction client — POST to extraction-service /api/extract
-//     with task='timer-context'. Requires creating a new extraction task type in
-//     learning_ai_common_plat/services/extraction-service/src/modules/extract/.
-//   Option B: @bytelyst/ollama-client — call Ollama directly with buildPrompt().
-//     Simpler, no extraction-service dependency, but no task abstraction.
-// The prompt is already built by buildPrompt() below. Just replace the fetch stub
-// with a real client call. Keep the 5s timeout and null-return-on-error pattern.
-async function llmEnrich(input: ContextMessageInput): Promise<string | null> {
-  // Only attempt if extraction service URL is configured
+async function llmViaExtraction(prompt: string): Promise<string | null> {
  const extractionUrl = config.EXTRACTION_SERVICE_URL;
  if (!extractionUrl) return null;

  try {
-    const prompt = buildPrompt(input);
    const res = await fetch(`${extractionUrl}/api/extract`, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
@ -85,9 +73,7 @@ async function llmEnrich(input: ContextMessageInput): Promise<string | null> {
      }),
      signal: AbortSignal.timeout(5_000),
    });
-
    if (!res.ok) return null;
-
    const data = await res.json() as { result?: string };
    return data.result ?? null;
  } catch {
@ -95,6 +81,37 @@ async function llmEnrich(input: ContextMessageInput): Promise<string | null> {
  }
 }

+async function llmViaOllama(prompt: string): Promise<string | null> {
+  const ollamaUrl = config.OLLAMA_URL;
+  if (!ollamaUrl) return null;
+
+  try {
+    const res = await fetch(`${ollamaUrl}/api/generate`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        model: config.OLLAMA_MODEL,
+        prompt,
+        stream: false,
+        options: { num_predict: 80, temperature: 0.7 },
+      }),
+      signal: AbortSignal.timeout(5_000),
+    });
+    if (!res.ok) return null;
+    const data = await res.json() as { response?: string };
+    const text = data.response?.trim();
+    return text && text.length > 5 ? text : null;
+  } catch {
+    return null;
+  }
+}
+
+async function llmEnrich(input: ContextMessageInput): Promise<string | null> {
+  const prompt = buildPrompt(input);
+  // Try extraction-service first, then Ollama
+  return await llmViaExtraction(prompt) ?? await llmViaOllama(prompt);
+}
+
 function buildPrompt(input: ContextMessageInput): string {
  const parts = [
    `Timer: "${input.timerLabel}" fires in ${input.minutesBefore} minutes.`,
--- a/backend/src/lib/config.ts
+++ b/backend/src/lib/config.ts
@ -15,6 +15,8 @@ const envSchema = baseBackendConfigSchema.extend({
  FIELD_ENCRYPT_MEK_NAME: z.string().default('chronomind-mek'),
  AZURE_KEYVAULT_URL: z.string().optional(),
  EXTRACTION_SERVICE_URL: z.string().optional(),
+  OLLAMA_URL: z.string().optional(),
+  OLLAMA_MODEL: z.string().default('gemma3:4b'),
 });

 export const config = envSchema.parse(process.env);