feat(platform-service): add shared knowledge foundation

This commit is contained in:
root 2026-03-15 09:33:17 +00:00
parent 728d8f2484
commit 66d0bf53a9
7 changed files with 654 additions and 0 deletions

View File

@ -83,6 +83,10 @@ const CONTAINER_DEFS: Record<string, ContainerConfig> = {
ai_budget_policies: { partitionKeyPath: '/productId' },
ai_budget_spend_entries: { partitionKeyPath: '/productId', defaultTtl: 30 * 86400 },
ai_budget_alerts: { partitionKeyPath: '/productId', defaultTtl: 30 * 86400 },
// Shared knowledge / retrieval catalog
knowledge_bases: { partitionKeyPath: '/productId' },
knowledge_sources: { partitionKeyPath: '/knowledgeBaseId' },
knowledge_chunks: { partitionKeyPath: '/knowledgeBaseId', defaultTtl: 90 * 86400 },
// Telemetry (client diagnostics — see docs/WINDSURF/CLIENT_TELEMETRY_DESIGN.md)
telemetry_events: { partitionKeyPath: '/pk', defaultTtl: 30 * 86400 },
telemetry_error_clusters: { partitionKeyPath: '/pk', defaultTtl: 90 * 86400 },

View File

@ -0,0 +1,65 @@
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { MemoryDatastoreProvider } from '@bytelyst/datastore';
import { _resetDatastoreProvider, setProvider } from '../../lib/datastore.js';
import * as repo from './repository.js';
describe('knowledge repository', () => {
beforeEach(() => {
setProvider(new MemoryDatastoreProvider());
});
afterEach(() => {
_resetDatastoreProvider();
});
it('stores knowledge bases, sources, and chunks', async () => {
await repo.createBase({
id: 'kb_1',
productId: 'lysnrai',
name: 'Support KB',
status: 'draft',
retrievalStrategy: 'hybrid',
tags: ['support'],
createdBy: 'admin_1',
createdAt: '2026-03-15T00:00:00.000Z',
updatedAt: '2026-03-15T00:00:00.000Z',
});
await repo.createSource({
id: 'ksrc_1',
knowledgeBaseId: 'kb_1',
productId: 'lysnrai',
type: 'url',
status: 'pending',
title: 'Runbook',
uri: 'https://example.com/runbook',
tags: ['runbook'],
createdAt: '2026-03-15T00:00:00.000Z',
updatedAt: '2026-03-15T00:00:00.000Z',
});
await repo.upsertChunks([
{
id: 'kb_1:ksrc_1:0',
knowledgeBaseId: 'kb_1',
sourceId: 'ksrc_1',
productId: 'lysnrai',
ordinal: 0,
contentText: 'Restart the worker before retrying the incident pipeline.',
tokenCount: 11,
embeddingStatus: 'ready',
citations: ['https://example.com/runbook#restart'],
tags: ['worker', 'incident'],
createdAt: '2026-03-15T00:00:00.000Z',
},
]);
const bases = await repo.listBases('lysnrai', { limit: 20 });
const sources = await repo.listSources('kb_1');
const chunks = await repo.listChunks('kb_1');
expect(bases).toHaveLength(1);
expect(sources[0].title).toBe('Runbook');
expect(chunks[0].embeddingStatus).toBe('ready');
});
});

View File

@ -0,0 +1,107 @@
import { NotFoundError } from '../../lib/errors.js';
import { getCollection } from '../../lib/datastore.js';
import type {
KnowledgeBaseDoc,
KnowledgeChunkDoc,
KnowledgeSourceDoc,
ListKnowledgeBasesQuery,
} from './types.js';
function baseCollection() {
return getCollection<KnowledgeBaseDoc>('knowledge_bases', '/productId');
}
function sourceCollection() {
return getCollection<KnowledgeSourceDoc>('knowledge_sources', '/knowledgeBaseId');
}
function chunkCollection() {
return getCollection<KnowledgeChunkDoc>('knowledge_chunks', '/knowledgeBaseId');
}
export async function createBase(doc: KnowledgeBaseDoc): Promise<KnowledgeBaseDoc> {
return baseCollection().create(doc);
}
export async function listBases(
productId: string,
query: ListKnowledgeBasesQuery
): Promise<KnowledgeBaseDoc[]> {
return baseCollection().findMany({
filter: {
productId,
...(query.status ? { status: query.status } : {}),
},
sort: { createdAt: -1 },
limit: query.limit,
});
}
export async function getBase(id: string, productId: string): Promise<KnowledgeBaseDoc> {
const base = await baseCollection().findById(id, productId);
if (!base) throw new NotFoundError(`Knowledge base '${id}' not found`);
return base;
}
export async function updateBase(
id: string,
productId: string,
updates: Partial<KnowledgeBaseDoc>
): Promise<KnowledgeBaseDoc> {
const updated = await baseCollection().update(id, productId, {
...updates,
updatedAt: new Date().toISOString(),
});
if (!updated) throw new NotFoundError(`Knowledge base '${id}' not found`);
return updated;
}
export async function createSource(doc: KnowledgeSourceDoc): Promise<KnowledgeSourceDoc> {
return sourceCollection().create(doc);
}
export async function getSource(id: string, knowledgeBaseId: string): Promise<KnowledgeSourceDoc> {
const source = await sourceCollection().findById(id, knowledgeBaseId);
if (!source) throw new NotFoundError(`Knowledge source '${id}' not found`);
return source;
}
export async function updateSource(
id: string,
knowledgeBaseId: string,
updates: Partial<KnowledgeSourceDoc>
): Promise<KnowledgeSourceDoc> {
const updated = await sourceCollection().update(id, knowledgeBaseId, {
...updates,
updatedAt: new Date().toISOString(),
});
if (!updated) throw new NotFoundError(`Knowledge source '${id}' not found`);
return updated;
}
export async function listSources(knowledgeBaseId: string): Promise<KnowledgeSourceDoc[]> {
return sourceCollection().findMany({
filter: { knowledgeBaseId },
sort: { createdAt: -1 },
limit: 200,
});
}
export async function upsertChunks(docs: KnowledgeChunkDoc[]): Promise<KnowledgeChunkDoc[]> {
const writes = docs.map(doc => chunkCollection().upsert(doc));
return Promise.all(writes);
}
export async function listChunks(
knowledgeBaseId: string,
filter: { sourceId?: string; limit?: number } = {}
): Promise<KnowledgeChunkDoc[]> {
return chunkCollection().findMany({
filter: {
knowledgeBaseId,
...(filter.sourceId ? { sourceId: filter.sourceId } : {}),
},
sort: { ordinal: 1 },
limit: filter.limit ?? 500,
});
}

View File

@ -0,0 +1,134 @@
import Fastify from 'fastify';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
const repoMock = {
listBases: vi.fn(),
createBase: vi.fn(),
getBase: vi.fn(),
updateBase: vi.fn(),
listSources: vi.fn(),
createSource: vi.fn(),
getSource: vi.fn(),
updateSource: vi.fn(),
upsertChunks: vi.fn(),
listChunks: vi.fn(),
};
vi.mock('./repository.js', () => repoMock);
async function buildApp(payload?: { sub: string; productId: string; role?: string }) {
const { knowledgeRoutes } = await import('./routes.js');
const app = Fastify({ logger: false });
if (payload) {
app.addHook('onRequest', async req => {
req.jwtPayload = payload;
});
}
await app.register(knowledgeRoutes, { prefix: '/api' });
return app;
}
describe('knowledgeRoutes', () => {
beforeEach(() => {
vi.clearAllMocks();
});
afterEach(() => {
vi.restoreAllMocks();
});
it('POST /knowledge/bases creates a knowledge base', async () => {
repoMock.createBase.mockResolvedValue({ id: 'kb_1', name: 'Support KB' });
const app = await buildApp({ sub: 'admin_1', productId: 'lysnrai', role: 'admin' });
const res = await app.inject({
method: 'POST',
url: '/api/knowledge/bases',
payload: {
name: 'Support KB',
retrievalStrategy: 'hybrid',
},
});
expect(res.statusCode).toBe(200);
expect(repoMock.createBase).toHaveBeenCalledWith(
expect.objectContaining({
name: 'Support KB',
retrievalStrategy: 'hybrid',
})
);
});
it('POST /knowledge/sources/:id/chunks stores retrieval-ready chunk manifests', async () => {
repoMock.getSource.mockResolvedValue({
id: 'ksrc_1',
knowledgeBaseId: 'kb_1',
productId: 'lysnrai',
});
repoMock.upsertChunks.mockResolvedValue([{ id: 'kb_1:ksrc_1:0' }]);
const app = await buildApp({ sub: 'admin_1', productId: 'lysnrai', role: 'admin' });
const res = await app.inject({
method: 'POST',
url: '/api/knowledge/sources/ksrc_1/chunks?knowledgeBaseId=kb_1',
payload: {
sourceStatus: 'indexed',
chunks: [
{
ordinal: 0,
contentText: 'Restart the worker before retrying the incident pipeline.',
embeddingStatus: 'ready',
tags: ['incident'],
},
],
},
});
expect(res.statusCode).toBe(200);
expect(repoMock.upsertChunks).toHaveBeenCalledWith([
expect.objectContaining({
knowledgeBaseId: 'kb_1',
sourceId: 'ksrc_1',
embeddingStatus: 'ready',
}),
]);
expect(repoMock.updateSource).toHaveBeenCalledWith('ksrc_1', 'kb_1', { status: 'indexed' });
});
it('POST /knowledge/bases/:id/retrieval-query returns scored matches', async () => {
repoMock.listChunks.mockResolvedValue([
{
id: 'chunk_1',
sourceId: 'ksrc_1',
ordinal: 0,
contentText: 'Restart the worker before retrying the incident pipeline.',
citations: ['https://example.com/runbook#restart'],
tags: ['worker', 'incident'],
},
{
id: 'chunk_2',
sourceId: 'ksrc_1',
ordinal: 1,
contentText: 'Billing retries are handled separately.',
citations: [],
tags: ['billing'],
},
]);
const app = await buildApp({ sub: 'admin_1', productId: 'lysnrai', role: 'admin' });
const res = await app.inject({
method: 'POST',
url: '/api/knowledge/bases/kb_1/retrieval-query',
payload: {
query: 'incident worker',
limit: 5,
},
});
expect(res.statusCode).toBe(200);
expect(res.json().matches[0]).toMatchObject({
chunkId: 'chunk_1',
sourceId: 'ksrc_1',
});
});
});

View File

@ -0,0 +1,212 @@
import { randomUUID } from 'node:crypto';
import type { FastifyInstance } from 'fastify';
import { BadRequestError, ForbiddenError } from '../../lib/errors.js';
import {
CreateKnowledgeBaseSchema,
CreateKnowledgeSourceSchema,
KnowledgeBaseDoc,
KnowledgeChunkDoc,
KnowledgeSourceDoc,
ListKnowledgeBasesQuerySchema,
RetrievalQuerySchema,
UpdateKnowledgeBaseSchema,
UpsertKnowledgeChunksSchema,
} from './types.js';
import * as repo from './repository.js';
function requireAdmin(req: { jwtPayload?: { sub?: string; role?: string; productId?: string } }): {
userId: string;
productId: string;
} {
const payload = req.jwtPayload;
if (!payload?.sub) throw new ForbiddenError('Authentication required');
if (!payload.role || !['super_admin', 'admin'].includes(payload.role)) {
throw new ForbiddenError('Admin access required');
}
return {
userId: payload.sub,
productId: payload.productId ?? process.env.DEFAULT_PRODUCT_ID ?? 'lysnrai',
};
}
function validationError(message: string): never {
throw new BadRequestError(message);
}
function scoreChunk(query: string, chunk: KnowledgeChunkDoc): number {
const q = query.toLowerCase();
const text = chunk.contentText.toLowerCase();
if (text.includes(q)) return 3;
const words = q.split(/\s+/).filter(Boolean);
let score = 0;
for (const word of words) {
if (text.includes(word)) score += 1;
if (chunk.tags.some(tag => tag.toLowerCase() === word)) score += 2;
}
return score;
}
export async function knowledgeRoutes(app: FastifyInstance) {
app.get('/knowledge/bases', async req => {
const access = requireAdmin(req);
const parsed = ListKnowledgeBasesQuerySchema.safeParse(req.query);
if (!parsed.success) {
validationError(parsed.error.issues.map(issue => issue.message).join('; '));
}
return repo.listBases(access.productId, parsed.data);
});
app.post('/knowledge/bases', async req => {
const access = requireAdmin(req);
const parsed = CreateKnowledgeBaseSchema.safeParse(req.body);
if (!parsed.success) {
validationError(parsed.error.issues.map(issue => issue.message).join('; '));
}
const now = new Date().toISOString();
const doc: KnowledgeBaseDoc = {
id: `kb_${randomUUID()}`,
productId: access.productId,
orgId: parsed.data.orgId,
workspaceId: parsed.data.workspaceId,
name: parsed.data.name,
description: parsed.data.description,
status: 'draft',
embeddingModel: parsed.data.embeddingModel,
retrievalStrategy: parsed.data.retrievalStrategy,
tags: parsed.data.tags,
metadata: parsed.data.metadata,
createdBy: access.userId,
createdAt: now,
updatedAt: now,
};
return repo.createBase(doc);
});
app.get('/knowledge/bases/:id', async req => {
const access = requireAdmin(req);
const { id } = req.params as { id: string };
return repo.getBase(id, access.productId);
});
app.patch('/knowledge/bases/:id', async req => {
const access = requireAdmin(req);
const { id } = req.params as { id: string };
const parsed = UpdateKnowledgeBaseSchema.safeParse(req.body);
if (!parsed.success) {
validationError(parsed.error.issues.map(issue => issue.message).join('; '));
}
return repo.updateBase(id, access.productId, parsed.data);
});
app.get('/knowledge/bases/:id/sources', async req => {
requireAdmin(req);
const { id } = req.params as { id: string };
return repo.listSources(id);
});
app.post('/knowledge/bases/:id/sources', async req => {
const access = requireAdmin(req);
const { id } = req.params as { id: string };
await repo.getBase(id, access.productId);
const parsed = CreateKnowledgeSourceSchema.safeParse(req.body);
if (!parsed.success) {
validationError(parsed.error.issues.map(issue => issue.message).join('; '));
}
const now = new Date().toISOString();
const doc: KnowledgeSourceDoc = {
id: `ksrc_${randomUUID()}`,
knowledgeBaseId: id,
productId: access.productId,
type: parsed.data.type,
status: 'pending',
title: parsed.data.title,
uri: parsed.data.uri,
extractionJobId: parsed.data.extractionJobId,
contentHash: parsed.data.contentHash,
tags: parsed.data.tags,
metadata: parsed.data.metadata,
createdAt: now,
updatedAt: now,
};
return repo.createSource(doc);
});
app.get('/knowledge/bases/:id/chunks', async req => {
requireAdmin(req);
const { id } = req.params as { id: string };
const { sourceId, limit } = req.query as { sourceId?: string; limit?: string };
return repo.listChunks(id, {
sourceId,
limit: limit ? Number(limit) : undefined,
});
});
app.post('/knowledge/sources/:id/chunks', async req => {
const access = requireAdmin(req);
const { id } = req.params as { id: string };
const { knowledgeBaseId } = req.query as { knowledgeBaseId: string };
const source = await repo.getSource(id, knowledgeBaseId);
if (source.productId !== access.productId) {
throw new ForbiddenError('Knowledge source is not in the current product');
}
const parsed = UpsertKnowledgeChunksSchema.safeParse(req.body);
if (!parsed.success) {
validationError(parsed.error.issues.map(issue => issue.message).join('; '));
}
const now = new Date().toISOString();
const chunks: KnowledgeChunkDoc[] = parsed.data.chunks.map(chunk => ({
id: `${knowledgeBaseId}:${id}:${chunk.ordinal}`,
knowledgeBaseId,
sourceId: id,
productId: access.productId,
ordinal: chunk.ordinal,
contentText: chunk.contentText,
tokenCount: chunk.tokenCount,
embeddingStatus: chunk.embeddingStatus,
citations: chunk.citations,
tags: chunk.tags,
metadata: chunk.metadata,
createdAt: now,
}));
const saved = await repo.upsertChunks(chunks);
if (parsed.data.sourceStatus) {
await repo.updateSource(id, knowledgeBaseId, { status: parsed.data.sourceStatus });
}
return saved;
});
app.post('/knowledge/bases/:id/retrieval-query', async req => {
requireAdmin(req);
const { id } = req.params as { id: string };
const parsed = RetrievalQuerySchema.safeParse(req.body);
if (!parsed.success) {
validationError(parsed.error.issues.map(issue => issue.message).join('; '));
}
const chunks = await repo.listChunks(id, {
sourceId: parsed.data.sourceId,
limit: 1000,
});
const matches = chunks
.map(chunk => ({
chunk,
score: scoreChunk(parsed.data.query, chunk),
}))
.filter(item => item.score > 0)
.sort((a, b) => b.score - a.score || a.chunk.ordinal - b.chunk.ordinal)
.slice(0, parsed.data.limit);
return {
query: parsed.data.query,
matches: matches.map(item => ({
chunkId: item.chunk.id,
sourceId: item.chunk.sourceId,
score: item.score,
preview: item.chunk.contentText.slice(0, 240),
citations: item.chunk.citations,
tags: item.chunk.tags,
})),
};
});
}

View File

@ -0,0 +1,130 @@
import { z } from 'zod';
export const KnowledgeBaseStatusSchema = z.enum(['draft', 'active', 'archived']);
export const KnowledgeSourceTypeSchema = z.enum(['url', 'file', 'manual', 'dataset', 'api']);
export const KnowledgeSourceStatusSchema = z.enum(['pending', 'indexed', 'failed', 'archived']);
export const KnowledgeChunkStatusSchema = z.enum(['pending', 'ready', 'archived']);
export const KnowledgeBaseSchema = z.object({
id: z.string().min(1),
productId: z.string().min(1),
orgId: z.string().optional(),
workspaceId: z.string().optional(),
name: z.string().min(1),
description: z.string().optional(),
status: KnowledgeBaseStatusSchema,
embeddingModel: z.string().optional(),
retrievalStrategy: z.string().default('hybrid'),
tags: z.array(z.string()).default([]),
metadata: z.record(z.unknown()).optional(),
createdBy: z.string().min(1),
createdAt: z.string(),
updatedAt: z.string(),
});
export type KnowledgeBaseDoc = z.infer<typeof KnowledgeBaseSchema> & {
_ts?: number;
_etag?: string;
};
export const KnowledgeSourceSchema = z.object({
id: z.string().min(1),
knowledgeBaseId: z.string().min(1),
productId: z.string().min(1),
type: KnowledgeSourceTypeSchema,
status: KnowledgeSourceStatusSchema,
title: z.string().min(1),
uri: z.string().min(1).optional(),
extractionJobId: z.string().optional(),
contentHash: z.string().optional(),
tags: z.array(z.string()).default([]),
metadata: z.record(z.unknown()).optional(),
createdAt: z.string(),
updatedAt: z.string(),
});
export type KnowledgeSourceDoc = z.infer<typeof KnowledgeSourceSchema> & {
_ts?: number;
_etag?: string;
};
export const KnowledgeChunkSchema = z.object({
id: z.string().min(1),
knowledgeBaseId: z.string().min(1),
sourceId: z.string().min(1),
productId: z.string().min(1),
ordinal: z.number().int().min(0),
contentText: z.string().min(1),
tokenCount: z.number().int().min(0).default(0),
embeddingStatus: KnowledgeChunkStatusSchema,
citations: z.array(z.string()).default([]),
tags: z.array(z.string()).default([]),
metadata: z.record(z.unknown()).optional(),
createdAt: z.string(),
});
export type KnowledgeChunkDoc = z.infer<typeof KnowledgeChunkSchema> & {
_ts?: number;
_etag?: string;
};
export const CreateKnowledgeBaseSchema = z.object({
orgId: z.string().optional(),
workspaceId: z.string().optional(),
name: z.string().min(1),
description: z.string().optional(),
embeddingModel: z.string().optional(),
retrievalStrategy: z.string().default('hybrid'),
tags: z.array(z.string()).default([]),
metadata: z.record(z.unknown()).optional(),
});
export const UpdateKnowledgeBaseSchema = z.object({
name: z.string().min(1).optional(),
description: z.string().optional(),
status: KnowledgeBaseStatusSchema.optional(),
embeddingModel: z.string().optional(),
retrievalStrategy: z.string().optional(),
tags: z.array(z.string()).optional(),
metadata: z.record(z.unknown()).optional(),
});
export const CreateKnowledgeSourceSchema = z.object({
type: KnowledgeSourceTypeSchema,
title: z.string().min(1),
uri: z.string().min(1).optional(),
extractionJobId: z.string().optional(),
contentHash: z.string().optional(),
tags: z.array(z.string()).default([]),
metadata: z.record(z.unknown()).optional(),
});
export const UpsertKnowledgeChunksSchema = z.object({
sourceStatus: KnowledgeSourceStatusSchema.optional(),
chunks: z
.array(
z.object({
ordinal: z.number().int().min(0),
contentText: z.string().min(1),
tokenCount: z.number().int().min(0).default(0),
embeddingStatus: KnowledgeChunkStatusSchema.default('pending'),
citations: z.array(z.string()).default([]),
tags: z.array(z.string()).default([]),
metadata: z.record(z.unknown()).optional(),
})
)
.min(1),
});
export const ListKnowledgeBasesQuerySchema = z.object({
status: KnowledgeBaseStatusSchema.optional(),
limit: z.coerce.number().min(1).max(100).default(20),
});
export const RetrievalQuerySchema = z.object({
query: z.string().min(1),
limit: z.coerce.number().min(1).max(50).default(10),
sourceId: z.string().optional(),
});
export type ListKnowledgeBasesQuery = z.infer<typeof ListKnowledgeBasesQuerySchema>;

View File

@ -38,6 +38,7 @@ import { auditRoutes } from './modules/audit/routes.js';
import { agentRoutes } from './modules/agents/routes.js';
import { agentEvalRoutes } from './modules/agent-evals/routes.js';
import { aiBudgetRoutes } from './modules/ai-budgets/routes.js';
import { knowledgeRoutes } from './modules/knowledge/routes.js';
import { notificationRoutes } from './modules/notifications/routes.js';
import { flagRoutes } from './modules/flags/routes.js';
import { rateLimitRoutes } from './modules/ratelimit/routes.js';
@ -145,6 +146,7 @@ await app.register(auditRoutes, { prefix: '/api' });
await app.register(agentRoutes, { prefix: '/api' });
await app.register(agentEvalRoutes, { prefix: '/api' });
await app.register(aiBudgetRoutes, { prefix: '/api' });
await app.register(knowledgeRoutes, { prefix: '/api' });
await app.register(notificationRoutes, { prefix: '/api' });
await app.register(flagRoutes, { prefix: '/api' });
await app.register(rateLimitRoutes, { prefix: '/api' });