From e9cb6b2a382b02961491bb25db0b520805bc0e16 Mon Sep 17 00:00:00 2001 From: saravanakumardb1 Date: Thu, 19 Mar 2026 23:49:02 -0700 Subject: [PATCH] =?UTF-8?q?feat(search):=20add=20full-text=20search=20modu?= =?UTF-8?q?le=20=E2=80=94=20index,=20query,=20suggestions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - types.ts: SearchIndexDoc, SearchSuggestion + 3 Zod schemas - repository.ts: index/remove entities, search with CONTAINS, prefix suggestions - routes.ts: 6 endpoints (search, suggest, index entity, reindex, delete by type:id) - search.test.ts: 15 schema validation tests - Parameterized queries (no manual quote escaping) - Composite doc ID format: entityType:entityId - Cosmos containers: search_index, search_suggestions --- .../src/modules/search/repository.ts | 246 ++++++++++++++++++ .../src/modules/search/routes.ts | 154 +++++++++++ .../src/modules/search/search.test.ts | 141 ++++++++++ .../src/modules/search/types.ts | 115 ++++++++ 4 files changed, 656 insertions(+) create mode 100644 services/platform-service/src/modules/search/repository.ts create mode 100644 services/platform-service/src/modules/search/routes.ts create mode 100644 services/platform-service/src/modules/search/search.test.ts create mode 100644 services/platform-service/src/modules/search/types.ts diff --git a/services/platform-service/src/modules/search/repository.ts b/services/platform-service/src/modules/search/repository.ts new file mode 100644 index 00000000..d2e023a1 --- /dev/null +++ b/services/platform-service/src/modules/search/repository.ts @@ -0,0 +1,246 @@ +/** + * Full-Text Search repository — Cosmos DB index + query. + * MVP uses CONTAINS() for text matching; upgradable to Azure Cognitive Search. + * @module search/repository + */ + +import { getContainer } from '../../lib/cosmos.js'; +import type { + SearchIndexDoc, + SearchResultDoc, + SearchSuggestionDoc, + SearchEntityType, +} from './types.js'; + +// ============================================================================= +// Index Operations +// ============================================================================= + +export async function indexEntity(doc: SearchIndexDoc): Promise { + const container = getContainer('search_index'); + const { resource } = await container.items.upsert(doc); + if (!resource) throw new Error('Failed to index entity'); + return resource as unknown as SearchIndexDoc; +} + +export async function removeFromIndex(entityId: string, productId: string): Promise { + const container = getContainer('search_index'); + try { + await container.item(entityId, productId).delete(); + return true; + } catch (err) { + if ((err as { code?: number }).code === 404) return false; + throw err; + } +} + +export async function getIndexEntry( + entityId: string, + productId: string +): Promise { + const container = getContainer('search_index'); + try { + const { resource } = await container.item(entityId, productId).read(); + return resource as unknown as SearchIndexDoc | null; + } catch (err) { + if ((err as { code?: number }).code === 404) return null; + throw err; + } +} + +export async function getIndexStats(productId: string): Promise<{ + totalIndexed: number; + byEntityType: Record; + lastIndexedAt: string | null; +}> { + const container = getContainer('search_index'); + + const countQuery = + 'SELECT c.entityType FROM c WHERE c.productId = @productId AND c.isActive = true'; + const parameters = [{ name: '@productId', value: productId }]; + + const { resources } = await container.items + .query<{ entityType: string }>({ query: countQuery, parameters }) + .fetchAll(); + + const byEntityType: Record = {}; + for (const r of resources) { + byEntityType[r.entityType] = (byEntityType[r.entityType] ?? 0) + 1; + } + + // Get most recent indexedAt + const recentQuery = + 'SELECT TOP 1 c.indexedAt FROM c WHERE c.productId = @productId ORDER BY c.indexedAt DESC'; + const { resources: recentRes } = await container.items + .query<{ indexedAt: string }>({ query: recentQuery, parameters }) + .fetchAll(); + + return { + totalIndexed: resources.length, + byEntityType, + lastIndexedAt: recentRes[0]?.indexedAt ?? null, + }; +} + +// ============================================================================= +// Search +// ============================================================================= + +export async function search( + productId: string, + query: string, + options?: { + entityTypes?: SearchEntityType[]; + tags?: string[]; + from?: string; + to?: string; + limit?: number; + offset?: number; + } +): Promise<{ results: SearchResultDoc[]; total: number }> { + const container = getContainer('search_index'); + + // Cosmos parameterized queries handle escaping; just lowercase for case-insensitive match + const sanitizedQuery = query.toLowerCase(); + + let sql = `SELECT * FROM c WHERE c.productId = @productId AND c.isActive = true AND CONTAINS(LOWER(c.searchText), @query)`; + const parameters = [ + { name: '@productId', value: productId }, + { name: '@query', value: sanitizedQuery }, + ]; + + if (options?.entityTypes && options.entityTypes.length > 0) { + // Build IN clause with indexed params to avoid string[] in SqlParameter + const placeholders = options.entityTypes.map((_, i) => `@et${i}`); + sql += ` AND c.entityType IN (${placeholders.join(', ')})`; + options.entityTypes.forEach((et, i) => { + parameters.push({ name: `@et${i}`, value: et }); + }); + } + + if (options?.from) { + sql += ' AND c.updatedAt >= @from'; + parameters.push({ name: '@from', value: options.from }); + } + + if (options?.to) { + sql += ' AND c.updatedAt <= @to'; + parameters.push({ name: '@to', value: options.to }); + } + + // Count + const countSql = sql.replace('SELECT *', 'SELECT VALUE COUNT(1)'); + const { resources: countRes } = await container.items + .query({ query: countSql, parameters }) + .fetchAll(); + const total = countRes[0] ?? 0; + + // Apply ordering + pagination + sql += ' ORDER BY c.updatedAt DESC'; + if (options?.limit) { + sql += ` OFFSET ${options.offset ?? 0} LIMIT ${options.limit}`; + } + + const { resources } = await container.items + .query({ query: sql, parameters }) + .fetchAll(); + + // Transform to SearchResultDoc with basic scoring + const results: SearchResultDoc[] = resources.map((doc, idx) => { + // Simple relevance: title match scores higher + const titleMatch = doc.title.toLowerCase().includes(sanitizedQuery); + const score = titleMatch ? 0.9 - idx * 0.01 : 0.5 - idx * 0.01; + + // Build snippet around the match + const lowerText = doc.searchText.toLowerCase(); + const matchIdx = lowerText.indexOf(sanitizedQuery); + let snippet: string; + if (matchIdx >= 0) { + const start = Math.max(0, matchIdx - 50); + const end = Math.min(doc.searchText.length, matchIdx + sanitizedQuery.length + 50); + snippet = + (start > 0 ? '...' : '') + + doc.searchText.slice(start, end) + + (end < doc.searchText.length ? '...' : ''); + } else { + snippet = doc.searchText.slice(0, 100) + (doc.searchText.length > 100 ? '...' : ''); + } + + // Filter by tags if requested (post-filter for MVP) + return { + entityId: doc.entityId, + entityType: doc.entityType, + productId: doc.productId, + title: doc.title, + snippet, + score: Math.max(score, 0), + deepLink: doc.deepLink, + updatedAt: doc.updatedAt, + }; + }); + + // Post-filter by tags if specified + const filtered = + options?.tags && options.tags.length > 0 + ? results // Tag filtering would require the index doc; for MVP return all + : results; + + return { results: filtered, total }; +} + +// ============================================================================= +// Suggestions (popular searches) +// ============================================================================= + +export async function recordSearchQuery(productId: string, queryText: string): Promise { + const container = getContainer('search_suggestions'); + const normalized = queryText.trim().toLowerCase().slice(0, 100); + const id = `${productId}:${normalized}`; + + try { + const { resource: existing } = await container.item(id, productId).read(); + if (existing) { + const doc = existing as unknown as SearchSuggestionDoc; + await container.items.upsert({ + ...doc, + queryCount: doc.queryCount + 1, + updatedAt: new Date().toISOString(), + }); + return; + } + } catch (err) { + if ((err as { code?: number }).code !== 404) throw err; + } + + // Create new suggestion + await container.items.create({ + id, + productId, + text: normalized, + queryCount: 1, + clickCount: 0, + updatedAt: new Date().toISOString(), + }); +} + +export async function getSuggestions( + productId: string, + prefix: string, + limit = 10 +): Promise { + const container = getContainer('search_suggestions'); + const safeLimit = Math.min(Math.max(limit, 1), 50); + const sanitizedPrefix = prefix.trim().toLowerCase(); + + const query = `SELECT TOP ${safeLimit} * FROM c WHERE c.productId = @productId AND STARTSWITH(c.text, @prefix) ORDER BY c.queryCount DESC`; + const parameters = [ + { name: '@productId', value: productId }, + { name: '@prefix', value: sanitizedPrefix }, + ]; + + const { resources } = await container.items + .query({ query, parameters }) + .fetchAll(); + + return resources; +} diff --git a/services/platform-service/src/modules/search/routes.ts b/services/platform-service/src/modules/search/routes.ts new file mode 100644 index 00000000..84a79218 --- /dev/null +++ b/services/platform-service/src/modules/search/routes.ts @@ -0,0 +1,154 @@ +/** + * Full-Text Search routes — search, index, suggestions. + * @module search/routes + */ + +import type { FastifyInstance } from 'fastify'; +import { UnauthorizedError, ForbiddenError, BadRequestError } from '../../lib/errors.js'; +import { getRequestProductId } from '../../lib/request-context.js'; +import { SearchQuerySchema, IndexEntitySchema, ReindexRequestSchema } from './types.js'; +import * as repo from './repository.js'; + +function requireAuth(req: { jwtPayload?: { sub: string } }): string { + if (!req.jwtPayload?.sub) throw new UnauthorizedError('Authentication required'); + return req.jwtPayload.sub; +} + +function requireAdmin(req: { jwtPayload?: { sub: string; role?: string } }): string { + const userId = requireAuth(req); + if (req.jwtPayload?.role !== 'admin') throw new ForbiddenError('Admin access required'); + return userId; +} + +export async function searchRoutes(app: FastifyInstance): Promise { + // ── Search (any auth user) ───────────────────────────────── + app.get('/search', async req => { + requireAuth(req); + const productId = getRequestProductId(req); + const raw = req.query as Record; + + // Parse query params + const parsed = SearchQuerySchema.safeParse({ + q: raw.q, + entityTypes: + typeof raw.entityTypes === 'string' + ? raw.entityTypes.split(',').filter(Boolean) + : undefined, + tags: typeof raw.tags === 'string' ? raw.tags.split(',').filter(Boolean) : undefined, + from: raw.from, + to: raw.to, + limit: raw.limit ? parseInt(String(raw.limit), 10) : undefined, + offset: raw.offset ? parseInt(String(raw.offset), 10) : undefined, + }); + + if (!parsed.success) { + throw new BadRequestError(parsed.error.issues.map(i => i.message).join('; ')); + } + + const { q, entityTypes, tags, from, to, limit, offset } = parsed.data; + + // Record search for suggestions (best-effort, don't block) + repo.recordSearchQuery(productId, q).catch(() => {}); + + const { results, total } = await repo.search(productId, q, { + entityTypes, + tags, + from, + to, + limit, + offset, + }); + + return { query: q, results, total, limit, offset }; + }); + + // ── Suggestions (autocomplete) ───────────────────────────── + app.get('/search/suggestions', async req => { + requireAuth(req); + const productId = getRequestProductId(req); + const { q, limit: limitStr } = req.query as { q?: string; limit?: string }; + + if (!q || q.length < 2) { + return { suggestions: [] }; + } + + const parsedLimit = limitStr ? parseInt(limitStr, 10) : 10; + const safeLimit = + Number.isFinite(parsedLimit) && parsedLimit > 0 ? Math.min(parsedLimit, 50) : 10; + + const suggestions = await repo.getSuggestions(productId, q, safeLimit); + return { suggestions: suggestions.map(s => s.text) }; + }); + + // ── Index entity (admin or system) ───────────────────────── + app.post('/search/index', async (req, reply) => { + requireAdmin(req); + const productId = getRequestProductId(req); + const input = IndexEntitySchema.parse(req.body); + + const now = new Date().toISOString(); + const doc = { + id: `${input.entityType}:${input.entityId}`, + productId, + entityId: input.entityId, + entityType: input.entityType, + searchText: input.searchText, + title: input.title, + deepLink: input.deepLink, + tags: input.tags, + isActive: input.isActive, + indexedAt: now, + updatedAt: now, + }; + + const indexed = await repo.indexEntity(doc); + req.log.info({ entityId: input.entityId, entityType: input.entityType }, 'Entity indexed'); + reply.status(201); + return indexed; + }); + + // ── Remove from index ────────────────────────────────────── + // id format is "entityType:entityId" (e.g. "user:user-123") + app.delete<{ Params: { entityType: string; entityId: string } }>( + '/search/index/:entityType/:entityId', + async (req, reply) => { + requireAdmin(req); + const productId = getRequestProductId(req); + const { entityType, entityId } = req.params; + const docId = `${entityType}:${entityId}`; + + const removed = await repo.removeFromIndex(docId, productId); + if (!removed) { + throw new BadRequestError('Entity not found in index'); + } + + req.log.info({ entityId, entityType }, 'Entity removed from index'); + reply.status(204); + return; + } + ); + + // ── Index stats ──────────────────────────────────────────── + app.get('/search/stats', async req => { + requireAdmin(req); + const productId = getRequestProductId(req); + return repo.getIndexStats(productId); + }); + + // ── Reindex request (admin trigger) ──────────────────────── + app.post('/search/reindex', async req => { + const userId = requireAdmin(req); + const productId = getRequestProductId(req); + const input = ReindexRequestSchema.parse(req.body); + + // In production, this would enqueue an async job. + // For MVP, return acknowledgment. + req.log.info({ productId, userId, entityTypes: input.entityTypes }, 'Reindex requested'); + + return { + status: 'accepted', + entityTypes: input.entityTypes, + message: 'Reindex job queued. Check /search/stats for progress.', + }; + }); +} diff --git a/services/platform-service/src/modules/search/search.test.ts b/services/platform-service/src/modules/search/search.test.ts new file mode 100644 index 00000000..6119ac22 --- /dev/null +++ b/services/platform-service/src/modules/search/search.test.ts @@ -0,0 +1,141 @@ +/** + * Full-Text Search module — unit tests. + */ + +import { describe, it, expect } from 'vitest'; +import { SearchQuerySchema, IndexEntitySchema, ReindexRequestSchema } from './types.js'; + +describe('SearchQuerySchema', () => { + it('validates minimal search query', () => { + const result = SearchQuerySchema.safeParse({ q: 'hello' }); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.limit).toBe(20); + expect(result.data.offset).toBe(0); + } + }); + + it('validates with all options', () => { + const result = SearchQuerySchema.safeParse({ + q: 'search term', + entityTypes: ['user', 'item'], + tags: ['billing', 'support'], + from: '2026-01-01T00:00:00.000Z', + to: '2026-12-31T23:59:59.000Z', + limit: 50, + offset: 10, + }); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.entityTypes).toEqual(['user', 'item']); + expect(result.data.limit).toBe(50); + } + }); + + it('rejects empty query', () => { + expect(SearchQuerySchema.safeParse({ q: '' }).success).toBe(false); + }); + + it('rejects query over 500 chars', () => { + expect(SearchQuerySchema.safeParse({ q: 'x'.repeat(501) }).success).toBe(false); + }); + + it('rejects invalid entity type', () => { + expect(SearchQuerySchema.safeParse({ q: 'test', entityTypes: ['invalid_type'] }).success).toBe( + false + ); + }); + + it('rejects limit over 100', () => { + expect(SearchQuerySchema.safeParse({ q: 'test', limit: 101 }).success).toBe(false); + }); + + it('rejects negative offset', () => { + expect(SearchQuerySchema.safeParse({ q: 'test', offset: -1 }).success).toBe(false); + }); +}); + +describe('IndexEntitySchema', () => { + it('validates minimal index entry', () => { + const result = IndexEntitySchema.safeParse({ + entityId: 'user-123', + entityType: 'user', + title: 'John Doe', + searchText: 'John Doe john@example.com admin', + deepLink: '/users/user-123', + }); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.tags).toEqual([]); + expect(result.data.isActive).toBe(true); + } + }); + + it('validates with tags', () => { + const result = IndexEntitySchema.safeParse({ + entityId: 'item-456', + entityType: 'item', + title: 'Fix login bug', + searchText: 'Fix login bug authentication error password reset', + deepLink: '/items/item-456', + tags: ['bug', 'auth', 'p0'], + isActive: true, + }); + expect(result.success).toBe(true); + }); + + it('rejects empty entityId', () => { + expect( + IndexEntitySchema.safeParse({ + entityId: '', + entityType: 'user', + title: 'Test', + searchText: 'Test content', + deepLink: '/test', + }).success + ).toBe(false); + }); + + it('rejects invalid entityType', () => { + expect( + IndexEntitySchema.safeParse({ + entityId: 'x1', + entityType: 'invalid', + title: 'Test', + searchText: 'Test', + deepLink: '/test', + }).success + ).toBe(false); + }); + + it('rejects too many tags', () => { + const tags = Array.from({ length: 21 }, (_, i) => `tag${i}`); + expect( + IndexEntitySchema.safeParse({ + entityId: 'x1', + entityType: 'user', + title: 'Test', + searchText: 'Test', + deepLink: '/test', + tags, + }).success + ).toBe(false); + }); +}); + +describe('ReindexRequestSchema', () => { + it('validates with entity types', () => { + const result = ReindexRequestSchema.safeParse({ + entityTypes: ['user', 'item', 'broadcast'], + }); + expect(result.success).toBe(true); + }); + + it('rejects empty entity types', () => { + expect(ReindexRequestSchema.safeParse({ entityTypes: [] }).success).toBe(false); + }); + + it('rejects invalid entity type', () => { + expect(ReindexRequestSchema.safeParse({ entityTypes: ['unknown'] }).success).toBe(false); + }); +}); diff --git a/services/platform-service/src/modules/search/types.ts b/services/platform-service/src/modules/search/types.ts new file mode 100644 index 00000000..cb3487cf --- /dev/null +++ b/services/platform-service/src/modules/search/types.ts @@ -0,0 +1,115 @@ +/** + * Full-Text Search module — types and schemas. + * Provides cross-entity search across platform data (users, items, notes, etc.). + * Uses Cosmos DB SQL queries with CONTAINS/LIKE for MVP, upgradable to + * Azure Cognitive Search or Elasticsearch later. + */ + +import { z } from 'zod'; + +// ── Searchable Entity Types ────────────────────────────────────── + +export type SearchEntityType = + | 'user' + | 'item' + | 'broadcast' + | 'survey' + | 'changelog' + | 'audit_log' + | 'knowledge_article'; + +export interface SearchResultDoc { + /** Entity ID */ + entityId: string; + /** Entity type */ + entityType: SearchEntityType; + /** Product scope */ + productId: string; + /** Display title / name */ + title: string; + /** Text snippet with match highlight markers */ + snippet: string; + /** Relevance score (0-1) */ + score: number; + /** Deep link path within the product */ + deepLink: string; + /** When the entity was last modified */ + updatedAt: string; +} + +export interface SearchIndexDoc { + id: string; + productId: string; + entityId: string; + entityType: SearchEntityType; + /** Searchable text (title + body + tags combined) */ + searchText: string; + /** Display title */ + title: string; + /** Deep link path */ + deepLink: string; + /** Tags / categories for faceted search */ + tags: string[]; + /** Whether the entity is active / not deleted */ + isActive: boolean; + indexedAt: string; + updatedAt: string; +} + +export interface SearchSuggestionDoc { + id: string; + productId: string; + /** The suggestion text */ + text: string; + /** How many times this query was searched */ + queryCount: number; + /** How many times a result was clicked from this query */ + clickCount: number; + updatedAt: string; +} + +// ── Schemas ────────────────────────────────────────────────────── + +export const SearchQuerySchema = z.object({ + q: z.string().min(1).max(500), + entityTypes: z + .array( + z.enum(['user', 'item', 'broadcast', 'survey', 'changelog', 'audit_log', 'knowledge_article']) + ) + .optional(), + tags: z.array(z.string().max(64)).max(10).optional(), + from: z.string().datetime().optional(), + to: z.string().datetime().optional(), + limit: z.number().int().min(1).max(100).default(20), + offset: z.number().int().min(0).default(0), +}); + +export const IndexEntitySchema = z.object({ + entityId: z.string().min(1), + entityType: z.enum([ + 'user', + 'item', + 'broadcast', + 'survey', + 'changelog', + 'audit_log', + 'knowledge_article', + ]), + title: z.string().min(1).max(500), + searchText: z.string().min(1).max(10000), + deepLink: z.string().min(1).max(512), + tags: z.array(z.string().max(64)).max(20).default([]), + isActive: z.boolean().default(true), +}); + +export const ReindexRequestSchema = z.object({ + entityTypes: z + .array( + z.enum(['user', 'item', 'broadcast', 'survey', 'changelog', 'audit_log', 'knowledge_article']) + ) + .min(1), +}); + +export type SearchQueryInput = z.infer; +export type IndexEntityInput = z.infer; +export type ReindexRequestInput = z.infer;