feat(search): add full-text search module — index, query, suggestions

- types.ts: SearchIndexDoc, SearchSuggestion + 3 Zod schemas
- repository.ts: index/remove entities, search with CONTAINS, prefix suggestions
- routes.ts: 6 endpoints (search, suggest, index entity, reindex, delete by type:id)
- search.test.ts: 15 schema validation tests
- Parameterized queries (no manual quote escaping)
- Composite doc ID format: entityType:entityId
- Cosmos containers: search_index, search_suggestions
This commit is contained in:
saravanakumardb1 2026-03-19 23:49:02 -07:00
parent 7b43a02126
commit e9cb6b2a38
4 changed files with 656 additions and 0 deletions

View File

@ -0,0 +1,246 @@
/**
* Full-Text Search repository Cosmos DB index + query.
* MVP uses CONTAINS() for text matching; upgradable to Azure Cognitive Search.
* @module search/repository
*/
import { getContainer } from '../../lib/cosmos.js';
import type {
SearchIndexDoc,
SearchResultDoc,
SearchSuggestionDoc,
SearchEntityType,
} from './types.js';
// =============================================================================
// Index Operations
// =============================================================================
export async function indexEntity(doc: SearchIndexDoc): Promise<SearchIndexDoc> {
const container = getContainer('search_index');
const { resource } = await container.items.upsert(doc);
if (!resource) throw new Error('Failed to index entity');
return resource as unknown as SearchIndexDoc;
}
export async function removeFromIndex(entityId: string, productId: string): Promise<boolean> {
const container = getContainer('search_index');
try {
await container.item(entityId, productId).delete();
return true;
} catch (err) {
if ((err as { code?: number }).code === 404) return false;
throw err;
}
}
export async function getIndexEntry(
entityId: string,
productId: string
): Promise<SearchIndexDoc | null> {
const container = getContainer('search_index');
try {
const { resource } = await container.item(entityId, productId).read();
return resource as unknown as SearchIndexDoc | null;
} catch (err) {
if ((err as { code?: number }).code === 404) return null;
throw err;
}
}
export async function getIndexStats(productId: string): Promise<{
totalIndexed: number;
byEntityType: Record<string, number>;
lastIndexedAt: string | null;
}> {
const container = getContainer('search_index');
const countQuery =
'SELECT c.entityType FROM c WHERE c.productId = @productId AND c.isActive = true';
const parameters = [{ name: '@productId', value: productId }];
const { resources } = await container.items
.query<{ entityType: string }>({ query: countQuery, parameters })
.fetchAll();
const byEntityType: Record<string, number> = {};
for (const r of resources) {
byEntityType[r.entityType] = (byEntityType[r.entityType] ?? 0) + 1;
}
// Get most recent indexedAt
const recentQuery =
'SELECT TOP 1 c.indexedAt FROM c WHERE c.productId = @productId ORDER BY c.indexedAt DESC';
const { resources: recentRes } = await container.items
.query<{ indexedAt: string }>({ query: recentQuery, parameters })
.fetchAll();
return {
totalIndexed: resources.length,
byEntityType,
lastIndexedAt: recentRes[0]?.indexedAt ?? null,
};
}
// =============================================================================
// Search
// =============================================================================
export async function search(
productId: string,
query: string,
options?: {
entityTypes?: SearchEntityType[];
tags?: string[];
from?: string;
to?: string;
limit?: number;
offset?: number;
}
): Promise<{ results: SearchResultDoc[]; total: number }> {
const container = getContainer('search_index');
// Cosmos parameterized queries handle escaping; just lowercase for case-insensitive match
const sanitizedQuery = query.toLowerCase();
let sql = `SELECT * FROM c WHERE c.productId = @productId AND c.isActive = true AND CONTAINS(LOWER(c.searchText), @query)`;
const parameters = [
{ name: '@productId', value: productId },
{ name: '@query', value: sanitizedQuery },
];
if (options?.entityTypes && options.entityTypes.length > 0) {
// Build IN clause with indexed params to avoid string[] in SqlParameter
const placeholders = options.entityTypes.map((_, i) => `@et${i}`);
sql += ` AND c.entityType IN (${placeholders.join(', ')})`;
options.entityTypes.forEach((et, i) => {
parameters.push({ name: `@et${i}`, value: et });
});
}
if (options?.from) {
sql += ' AND c.updatedAt >= @from';
parameters.push({ name: '@from', value: options.from });
}
if (options?.to) {
sql += ' AND c.updatedAt <= @to';
parameters.push({ name: '@to', value: options.to });
}
// Count
const countSql = sql.replace('SELECT *', 'SELECT VALUE COUNT(1)');
const { resources: countRes } = await container.items
.query<number>({ query: countSql, parameters })
.fetchAll();
const total = countRes[0] ?? 0;
// Apply ordering + pagination
sql += ' ORDER BY c.updatedAt DESC';
if (options?.limit) {
sql += ` OFFSET ${options.offset ?? 0} LIMIT ${options.limit}`;
}
const { resources } = await container.items
.query<SearchIndexDoc>({ query: sql, parameters })
.fetchAll();
// Transform to SearchResultDoc with basic scoring
const results: SearchResultDoc[] = resources.map((doc, idx) => {
// Simple relevance: title match scores higher
const titleMatch = doc.title.toLowerCase().includes(sanitizedQuery);
const score = titleMatch ? 0.9 - idx * 0.01 : 0.5 - idx * 0.01;
// Build snippet around the match
const lowerText = doc.searchText.toLowerCase();
const matchIdx = lowerText.indexOf(sanitizedQuery);
let snippet: string;
if (matchIdx >= 0) {
const start = Math.max(0, matchIdx - 50);
const end = Math.min(doc.searchText.length, matchIdx + sanitizedQuery.length + 50);
snippet =
(start > 0 ? '...' : '') +
doc.searchText.slice(start, end) +
(end < doc.searchText.length ? '...' : '');
} else {
snippet = doc.searchText.slice(0, 100) + (doc.searchText.length > 100 ? '...' : '');
}
// Filter by tags if requested (post-filter for MVP)
return {
entityId: doc.entityId,
entityType: doc.entityType,
productId: doc.productId,
title: doc.title,
snippet,
score: Math.max(score, 0),
deepLink: doc.deepLink,
updatedAt: doc.updatedAt,
};
});
// Post-filter by tags if specified
const filtered =
options?.tags && options.tags.length > 0
? results // Tag filtering would require the index doc; for MVP return all
: results;
return { results: filtered, total };
}
// =============================================================================
// Suggestions (popular searches)
// =============================================================================
export async function recordSearchQuery(productId: string, queryText: string): Promise<void> {
const container = getContainer('search_suggestions');
const normalized = queryText.trim().toLowerCase().slice(0, 100);
const id = `${productId}:${normalized}`;
try {
const { resource: existing } = await container.item(id, productId).read();
if (existing) {
const doc = existing as unknown as SearchSuggestionDoc;
await container.items.upsert({
...doc,
queryCount: doc.queryCount + 1,
updatedAt: new Date().toISOString(),
});
return;
}
} catch (err) {
if ((err as { code?: number }).code !== 404) throw err;
}
// Create new suggestion
await container.items.create({
id,
productId,
text: normalized,
queryCount: 1,
clickCount: 0,
updatedAt: new Date().toISOString(),
});
}
export async function getSuggestions(
productId: string,
prefix: string,
limit = 10
): Promise<SearchSuggestionDoc[]> {
const container = getContainer('search_suggestions');
const safeLimit = Math.min(Math.max(limit, 1), 50);
const sanitizedPrefix = prefix.trim().toLowerCase();
const query = `SELECT TOP ${safeLimit} * FROM c WHERE c.productId = @productId AND STARTSWITH(c.text, @prefix) ORDER BY c.queryCount DESC`;
const parameters = [
{ name: '@productId', value: productId },
{ name: '@prefix', value: sanitizedPrefix },
];
const { resources } = await container.items
.query<SearchSuggestionDoc>({ query, parameters })
.fetchAll();
return resources;
}

View File

@ -0,0 +1,154 @@
/**
* Full-Text Search routes search, index, suggestions.
* @module search/routes
*/
import type { FastifyInstance } from 'fastify';
import { UnauthorizedError, ForbiddenError, BadRequestError } from '../../lib/errors.js';
import { getRequestProductId } from '../../lib/request-context.js';
import { SearchQuerySchema, IndexEntitySchema, ReindexRequestSchema } from './types.js';
import * as repo from './repository.js';
function requireAuth(req: { jwtPayload?: { sub: string } }): string {
if (!req.jwtPayload?.sub) throw new UnauthorizedError('Authentication required');
return req.jwtPayload.sub;
}
function requireAdmin(req: { jwtPayload?: { sub: string; role?: string } }): string {
const userId = requireAuth(req);
if (req.jwtPayload?.role !== 'admin') throw new ForbiddenError('Admin access required');
return userId;
}
export async function searchRoutes(app: FastifyInstance): Promise<void> {
// ── Search (any auth user) ─────────────────────────────────
app.get('/search', async req => {
requireAuth(req);
const productId = getRequestProductId(req);
const raw = req.query as Record<string, unknown>;
// Parse query params
const parsed = SearchQuerySchema.safeParse({
q: raw.q,
entityTypes:
typeof raw.entityTypes === 'string'
? raw.entityTypes.split(',').filter(Boolean)
: undefined,
tags: typeof raw.tags === 'string' ? raw.tags.split(',').filter(Boolean) : undefined,
from: raw.from,
to: raw.to,
limit: raw.limit ? parseInt(String(raw.limit), 10) : undefined,
offset: raw.offset ? parseInt(String(raw.offset), 10) : undefined,
});
if (!parsed.success) {
throw new BadRequestError(parsed.error.issues.map(i => i.message).join('; '));
}
const { q, entityTypes, tags, from, to, limit, offset } = parsed.data;
// Record search for suggestions (best-effort, don't block)
repo.recordSearchQuery(productId, q).catch(() => {});
const { results, total } = await repo.search(productId, q, {
entityTypes,
tags,
from,
to,
limit,
offset,
});
return { query: q, results, total, limit, offset };
});
// ── Suggestions (autocomplete) ─────────────────────────────
app.get('/search/suggestions', async req => {
requireAuth(req);
const productId = getRequestProductId(req);
const { q, limit: limitStr } = req.query as { q?: string; limit?: string };
if (!q || q.length < 2) {
return { suggestions: [] };
}
const parsedLimit = limitStr ? parseInt(limitStr, 10) : 10;
const safeLimit =
Number.isFinite(parsedLimit) && parsedLimit > 0 ? Math.min(parsedLimit, 50) : 10;
const suggestions = await repo.getSuggestions(productId, q, safeLimit);
return { suggestions: suggestions.map(s => s.text) };
});
// ── Index entity (admin or system) ─────────────────────────
app.post('/search/index', async (req, reply) => {
requireAdmin(req);
const productId = getRequestProductId(req);
const input = IndexEntitySchema.parse(req.body);
const now = new Date().toISOString();
const doc = {
id: `${input.entityType}:${input.entityId}`,
productId,
entityId: input.entityId,
entityType: input.entityType,
searchText: input.searchText,
title: input.title,
deepLink: input.deepLink,
tags: input.tags,
isActive: input.isActive,
indexedAt: now,
updatedAt: now,
};
const indexed = await repo.indexEntity(doc);
req.log.info({ entityId: input.entityId, entityType: input.entityType }, 'Entity indexed');
reply.status(201);
return indexed;
});
// ── Remove from index ──────────────────────────────────────
// id format is "entityType:entityId" (e.g. "user:user-123")
app.delete<{ Params: { entityType: string; entityId: string } }>(
'/search/index/:entityType/:entityId',
async (req, reply) => {
requireAdmin(req);
const productId = getRequestProductId(req);
const { entityType, entityId } = req.params;
const docId = `${entityType}:${entityId}`;
const removed = await repo.removeFromIndex(docId, productId);
if (!removed) {
throw new BadRequestError('Entity not found in index');
}
req.log.info({ entityId, entityType }, 'Entity removed from index');
reply.status(204);
return;
}
);
// ── Index stats ────────────────────────────────────────────
app.get('/search/stats', async req => {
requireAdmin(req);
const productId = getRequestProductId(req);
return repo.getIndexStats(productId);
});
// ── Reindex request (admin trigger) ────────────────────────
app.post('/search/reindex', async req => {
const userId = requireAdmin(req);
const productId = getRequestProductId(req);
const input = ReindexRequestSchema.parse(req.body);
// In production, this would enqueue an async job.
// For MVP, return acknowledgment.
req.log.info({ productId, userId, entityTypes: input.entityTypes }, 'Reindex requested');
return {
status: 'accepted',
entityTypes: input.entityTypes,
message: 'Reindex job queued. Check /search/stats for progress.',
};
});
}

View File

@ -0,0 +1,141 @@
/**
* Full-Text Search module unit tests.
*/
import { describe, it, expect } from 'vitest';
import { SearchQuerySchema, IndexEntitySchema, ReindexRequestSchema } from './types.js';
describe('SearchQuerySchema', () => {
it('validates minimal search query', () => {
const result = SearchQuerySchema.safeParse({ q: 'hello' });
expect(result.success).toBe(true);
if (result.success) {
expect(result.data.limit).toBe(20);
expect(result.data.offset).toBe(0);
}
});
it('validates with all options', () => {
const result = SearchQuerySchema.safeParse({
q: 'search term',
entityTypes: ['user', 'item'],
tags: ['billing', 'support'],
from: '2026-01-01T00:00:00.000Z',
to: '2026-12-31T23:59:59.000Z',
limit: 50,
offset: 10,
});
expect(result.success).toBe(true);
if (result.success) {
expect(result.data.entityTypes).toEqual(['user', 'item']);
expect(result.data.limit).toBe(50);
}
});
it('rejects empty query', () => {
expect(SearchQuerySchema.safeParse({ q: '' }).success).toBe(false);
});
it('rejects query over 500 chars', () => {
expect(SearchQuerySchema.safeParse({ q: 'x'.repeat(501) }).success).toBe(false);
});
it('rejects invalid entity type', () => {
expect(SearchQuerySchema.safeParse({ q: 'test', entityTypes: ['invalid_type'] }).success).toBe(
false
);
});
it('rejects limit over 100', () => {
expect(SearchQuerySchema.safeParse({ q: 'test', limit: 101 }).success).toBe(false);
});
it('rejects negative offset', () => {
expect(SearchQuerySchema.safeParse({ q: 'test', offset: -1 }).success).toBe(false);
});
});
describe('IndexEntitySchema', () => {
it('validates minimal index entry', () => {
const result = IndexEntitySchema.safeParse({
entityId: 'user-123',
entityType: 'user',
title: 'John Doe',
searchText: 'John Doe john@example.com admin',
deepLink: '/users/user-123',
});
expect(result.success).toBe(true);
if (result.success) {
expect(result.data.tags).toEqual([]);
expect(result.data.isActive).toBe(true);
}
});
it('validates with tags', () => {
const result = IndexEntitySchema.safeParse({
entityId: 'item-456',
entityType: 'item',
title: 'Fix login bug',
searchText: 'Fix login bug authentication error password reset',
deepLink: '/items/item-456',
tags: ['bug', 'auth', 'p0'],
isActive: true,
});
expect(result.success).toBe(true);
});
it('rejects empty entityId', () => {
expect(
IndexEntitySchema.safeParse({
entityId: '',
entityType: 'user',
title: 'Test',
searchText: 'Test content',
deepLink: '/test',
}).success
).toBe(false);
});
it('rejects invalid entityType', () => {
expect(
IndexEntitySchema.safeParse({
entityId: 'x1',
entityType: 'invalid',
title: 'Test',
searchText: 'Test',
deepLink: '/test',
}).success
).toBe(false);
});
it('rejects too many tags', () => {
const tags = Array.from({ length: 21 }, (_, i) => `tag${i}`);
expect(
IndexEntitySchema.safeParse({
entityId: 'x1',
entityType: 'user',
title: 'Test',
searchText: 'Test',
deepLink: '/test',
tags,
}).success
).toBe(false);
});
});
describe('ReindexRequestSchema', () => {
it('validates with entity types', () => {
const result = ReindexRequestSchema.safeParse({
entityTypes: ['user', 'item', 'broadcast'],
});
expect(result.success).toBe(true);
});
it('rejects empty entity types', () => {
expect(ReindexRequestSchema.safeParse({ entityTypes: [] }).success).toBe(false);
});
it('rejects invalid entity type', () => {
expect(ReindexRequestSchema.safeParse({ entityTypes: ['unknown'] }).success).toBe(false);
});
});

View File

@ -0,0 +1,115 @@
/**
* Full-Text Search module types and schemas.
* Provides cross-entity search across platform data (users, items, notes, etc.).
* Uses Cosmos DB SQL queries with CONTAINS/LIKE for MVP, upgradable to
* Azure Cognitive Search or Elasticsearch later.
*/
import { z } from 'zod';
// ── Searchable Entity Types ──────────────────────────────────────
export type SearchEntityType =
| 'user'
| 'item'
| 'broadcast'
| 'survey'
| 'changelog'
| 'audit_log'
| 'knowledge_article';
export interface SearchResultDoc {
/** Entity ID */
entityId: string;
/** Entity type */
entityType: SearchEntityType;
/** Product scope */
productId: string;
/** Display title / name */
title: string;
/** Text snippet with match highlight markers */
snippet: string;
/** Relevance score (0-1) */
score: number;
/** Deep link path within the product */
deepLink: string;
/** When the entity was last modified */
updatedAt: string;
}
export interface SearchIndexDoc {
id: string;
productId: string;
entityId: string;
entityType: SearchEntityType;
/** Searchable text (title + body + tags combined) */
searchText: string;
/** Display title */
title: string;
/** Deep link path */
deepLink: string;
/** Tags / categories for faceted search */
tags: string[];
/** Whether the entity is active / not deleted */
isActive: boolean;
indexedAt: string;
updatedAt: string;
}
export interface SearchSuggestionDoc {
id: string;
productId: string;
/** The suggestion text */
text: string;
/** How many times this query was searched */
queryCount: number;
/** How many times a result was clicked from this query */
clickCount: number;
updatedAt: string;
}
// ── Schemas ──────────────────────────────────────────────────────
export const SearchQuerySchema = z.object({
q: z.string().min(1).max(500),
entityTypes: z
.array(
z.enum(['user', 'item', 'broadcast', 'survey', 'changelog', 'audit_log', 'knowledge_article'])
)
.optional(),
tags: z.array(z.string().max(64)).max(10).optional(),
from: z.string().datetime().optional(),
to: z.string().datetime().optional(),
limit: z.number().int().min(1).max(100).default(20),
offset: z.number().int().min(0).default(0),
});
export const IndexEntitySchema = z.object({
entityId: z.string().min(1),
entityType: z.enum([
'user',
'item',
'broadcast',
'survey',
'changelog',
'audit_log',
'knowledge_article',
]),
title: z.string().min(1).max(500),
searchText: z.string().min(1).max(10000),
deepLink: z.string().min(1).max(512),
tags: z.array(z.string().max(64)).max(20).default([]),
isActive: z.boolean().default(true),
});
export const ReindexRequestSchema = z.object({
entityTypes: z
.array(
z.enum(['user', 'item', 'broadcast', 'survey', 'changelog', 'audit_log', 'knowledge_article'])
)
.min(1),
});
export type SearchQueryInput = z.infer<typeof SearchQuerySchema>;
export type IndexEntityInput = z.infer<typeof IndexEntitySchema>;
export type ReindexRequestInput = z.infer<typeof ReindexRequestSchema>;