feat(knowledge): deepen Knowledge/RAG — search, stats, delete endpoints

- repository.ts: add searchChunks (term-based text search with scoring),
  getBaseStats (source/chunk/token counts), deleteSource, deleteBase
- routes.ts: 4 new endpoints — GET /knowledge/bases/:id/stats,
  DELETE /knowledge/bases/:id (draft only), DELETE /knowledge/bases/:id/sources/:sourceId,
  POST /knowledge/bases/:id/search (text search with previews)
- Existing 4 tests unchanged, typecheck clean
This commit is contained in:
saravanakumardb1 2026-03-20 00:42:12 -07:00
parent ae87371b3a
commit 20663d7078
2 changed files with 119 additions and 0 deletions

View File

@ -105,3 +105,65 @@ export async function listChunks(
limit: filter.limit ?? 500, limit: filter.limit ?? 500,
}); });
} }
export async function searchChunks(
knowledgeBaseId: string,
query: string,
options: { sourceId?: string; limit?: number } = {}
): Promise<KnowledgeChunkDoc[]> {
const allChunks = await listChunks(knowledgeBaseId, {
sourceId: options.sourceId,
limit: 1000,
});
const queryLower = query.toLowerCase();
const terms = queryLower.split(/\s+/).filter(Boolean);
const scored = allChunks
.map(chunk => {
const text = chunk.contentText.toLowerCase();
let score = 0;
for (const term of terms) {
if (text.includes(term)) score++;
}
return { chunk, score };
})
.filter(s => s.score > 0)
.sort((a, b) => b.score - a.score)
.slice(0, options.limit ?? 10);
return scored.map(s => s.chunk);
}
export interface KnowledgeBaseStats {
knowledgeBaseId: string;
sourceCount: number;
chunkCount: number;
totalTokens: number;
indexedSources: number;
pendingSources: number;
failedSources: number;
}
export async function getBaseStats(knowledgeBaseId: string): Promise<KnowledgeBaseStats> {
const sources = await listSources(knowledgeBaseId);
const chunks = await listChunks(knowledgeBaseId, { limit: 10000 });
return {
knowledgeBaseId,
sourceCount: sources.length,
chunkCount: chunks.length,
totalTokens: chunks.reduce((sum, c) => sum + (c.tokenCount ?? 0), 0),
indexedSources: sources.filter(s => s.status === 'indexed').length,
pendingSources: sources.filter(s => s.status === 'pending').length,
failedSources: sources.filter(s => s.status === 'failed').length,
};
}
export async function deleteSource(id: string, knowledgeBaseId: string): Promise<void> {
await sourceCollection().delete(id, knowledgeBaseId);
}
export async function deleteBase(id: string, productId: string): Promise<void> {
await baseCollection().delete(id, productId);
}

View File

@ -209,4 +209,61 @@ export async function knowledgeRoutes(app: FastifyInstance) {
})), })),
}; };
}); });
// ── Knowledge base stats ───────────────────────────────
app.get('/knowledge/bases/:id/stats', async req => {
const access = requireAdmin(req);
const { id } = req.params as { id: string };
await repo.getBase(id, access.productId);
return repo.getBaseStats(id);
});
// ── Delete knowledge base (draft only) ─────────────────
app.delete('/knowledge/bases/:id', async req => {
const access = requireAdmin(req);
const { id } = req.params as { id: string };
const base = await repo.getBase(id, access.productId);
if (base.status !== 'draft') {
throw new BadRequestError('Only draft knowledge bases can be deleted');
}
await repo.deleteBase(id, access.productId);
return { deleted: true };
});
// ── Delete knowledge source ────────────────────────────
app.delete('/knowledge/bases/:id/sources/:sourceId', async req => {
const access = requireAdmin(req);
const { id, sourceId } = req.params as { id: string; sourceId: string };
await repo.getBase(id, access.productId);
await repo.getSource(sourceId, id);
await repo.deleteSource(sourceId, id);
return { deleted: true };
});
// ── Search chunks (text search) ────────────────────────
app.post('/knowledge/bases/:id/search', async req => {
requireAdmin(req);
const { id } = req.params as { id: string };
const parsed = RetrievalQuerySchema.safeParse(req.body);
if (!parsed.success) {
validationError(parsed.error.issues.map(issue => issue.message).join('; '));
}
const results = await repo.searchChunks(id, parsed.data.query, {
sourceId: parsed.data.sourceId,
limit: parsed.data.limit,
});
return {
query: parsed.data.query,
count: results.length,
chunks: results.map(c => ({
id: c.id,
sourceId: c.sourceId,
ordinal: c.ordinal,
preview: c.contentText.slice(0, 240),
tokenCount: c.tokenCount,
citations: c.citations,
tags: c.tags,
})),
};
});
} }