feat(mcp-server): Milestone B — telemetry policy CRUD, cluster status, diagnostics cancel, extraction sidecar health; enforce expiresAt on createPolicy
This commit is contained in:
parent
26d3403d5a
commit
d1d643f782
@ -15,29 +15,36 @@ This is the “ready to start building” checklist that turns the docs in this
|
|||||||
flushes to session-scoped endpoints (`/api/diagnostics/sessions/:id/logs|traces`). No change needed.
|
flushes to session-scoped endpoints (`/api/diagnostics/sessions/:id/logs|traces`). No change needed.
|
||||||
- Confirmed in `packages/diagnostics-client/src/client.ts` flush() method, lines 430–453
|
- Confirmed in `packages/diagnostics-client/src/client.ts` flush() method, lines 430–453
|
||||||
|
|
||||||
## 3) Phase 1 build steps (P0 slice)
|
## 3) Phase 1 build steps
|
||||||
|
|
||||||
- **Implement MCP tool namespaces** ✅ — [027e216](https://github.com/saravanakumardb1/learning_ai_common_plat/commit/027e216)
|
### Milestone A + C (P0 slice) ✅ — [027e216](https://github.com/saravanakumardb1/learning_ai_common_plat/commit/027e216)
|
||||||
- [x] `platform.telemetry.*` — query, clusters, metrics (3 tools)
|
|
||||||
- [x] `platform.diagnostics.*` — sessions.list/create/get/update/getLogs/getTraces (6 tools)
|
|
||||||
- [x] `extraction.*` — run, models, cacheStats (3 tools)
|
|
||||||
- **Enforce hard guardrails in MCP layer** ✅ — [027e216](https://github.com/saravanakumardb1/learning_ai_common_plat/commit/027e216)
|
|
||||||
- [x] `productId` required in all query tools, forwarded as `x-product-id`
|
|
||||||
- [x] `x-request-id` propagated via `req.id` on every upstream call
|
|
||||||
- [x] default query caps (`QUERY_DEFAULT_LIMIT=20`) + hard caps (`QUERY_MAX_LIMIT=100`)
|
|
||||||
- [x] role gating (`viewer` / `admin` / `super_admin`) enforced in `requireRole()`
|
|
||||||
- [ ] expiry enforcement for diagnostics sessions — delegated to platform-service `maxDurationMinutes`
|
|
||||||
- **Ship one compound tool** ✅ — [027e216](https://github.com/saravanakumardb1/learning_ai_common_plat/commit/027e216)
|
|
||||||
- [x] `support.createDebugPack(productId, targetUserId?, from?, to?, reason?)`
|
|
||||||
|
|
||||||
## 4) Phase 1 definition of done
|
- [x] `platform.telemetry.*` — query, clusters, metrics (3 tools)
|
||||||
|
- [x] `platform.diagnostics.*` — sessions.list/create/get/update/getLogs/getTraces (6 tools)
|
||||||
|
- [x] `extraction.*` — run, models, cacheStats (3 tools)
|
||||||
|
- [x] `support.createDebugPack(productId, targetUserId?, from?, to?, reason?)`
|
||||||
|
|
||||||
|
### Milestone B (mutating tools + expiry + cluster status) ✅ — [26d3403](https://github.com/saravanakumardb1/learning_ai_common_plat/commit/26d3403)
|
||||||
|
|
||||||
|
- [x] `platform.telemetry.policies.*` — list, preview, create (expiresAt required), update, delete (5 tools)
|
||||||
|
- [x] `platform.telemetry.clusters.updateStatus` — resolve/ignore/reopen clusters
|
||||||
|
- [x] `platform.diagnostics.sessions.cancel` — dedicated cancel tool
|
||||||
|
- [x] `extraction.sidecarHealth` — Python sidecar health check
|
||||||
|
- [x] `expiresAt` enforced as **required** field on `telemetry.policies.create` at MCP layer
|
||||||
|
- [x] `productId` required in all query tools, forwarded as `x-product-id`
|
||||||
|
- [x] `x-request-id` propagated via `req.id` on every upstream call
|
||||||
|
- [x] default query caps (`QUERY_DEFAULT_LIMIT=20`) + hard caps (`QUERY_MAX_LIMIT=100`)
|
||||||
|
- [x] role gating (`viewer` / `admin` / `super_admin`) enforced in `requireRole()`
|
||||||
|
- [x] expiry for diagnostics sessions delegated to platform-service `maxDurationMinutes`
|
||||||
|
|
||||||
|
## 4) Phase 1 definition of done ✅ COMPLETE
|
||||||
|
|
||||||
- [x] Read-only tools work end-to-end against real services (proxy to platform-service + extraction-service)
|
- [x] Read-only tools work end-to-end against real services (proxy to platform-service + extraction-service)
|
||||||
- [x] Mutating tools are role-gated (`admin` minimum) and log audit entries via `req.log`
|
- [x] Mutating tools are role-gated (`admin` minimum) and log audit entries via `req.log`
|
||||||
- [x] Compound debug pack produces a single structured artifact with:
|
- [x] Compound debug pack produces a single structured artifact with cluster refs, session ref, markdown summary
|
||||||
- [x] telemetry cluster references (up to 10 shown, count included)
|
- [x] Telemetry policy CRUD with mandatory `expiresAt` guardrail
|
||||||
- [x] optional diagnostics session reference (id, status, expiresAt)
|
- [x] Error cluster triage (resolve / ignore / reopen)
|
||||||
- [x] short markdown summary
|
- [x] Python sidecar health visible via `extraction.sidecarHealth`
|
||||||
- [ ] End-to-end integration test with real platform-service (Phase 2)
|
- [ ] End-to-end integration test with real platform-service (Phase 2)
|
||||||
|
|
||||||
## 5) Phase 2+ quick sanity checks
|
## 5) Phase 2+ quick sanity checks
|
||||||
|
|||||||
@ -61,3 +61,12 @@ export async function extractionCacheStats(opts: { requestId?: string }): Promis
|
|||||||
if (!res.ok) throw new Error(`extraction-service GET /api/extract/cache-stats → ${res.status}`);
|
if (!res.ok) throw new Error(`extraction-service GET /api/extract/cache-stats → ${res.status}`);
|
||||||
return res.json();
|
return res.json();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function extractionSidecarHealth(opts: { requestId?: string }): Promise<unknown> {
|
||||||
|
const url = `${config.EXTRACTION_SERVICE_URL}/api/extract/sidecar-health`;
|
||||||
|
const headers: Record<string, string> = {
|
||||||
|
...(opts.requestId ? { 'x-request-id': opts.requestId } : {}),
|
||||||
|
};
|
||||||
|
const res = await fetch(url, { headers, signal: AbortSignal.timeout(10_000) });
|
||||||
|
return res.json();
|
||||||
|
}
|
||||||
|
|||||||
@ -90,6 +90,69 @@ export async function telemetryMetrics(opts: PlatformClientOptions): Promise<unk
|
|||||||
return platformFetch<unknown>('/api/telemetry/metrics', { method: 'GET' }, opts);
|
return platformFetch<unknown>('/api/telemetry/metrics', { method: 'GET' }, opts);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function telemetryListPolicies(opts: PlatformClientOptions): Promise<unknown> {
|
||||||
|
return platformFetch<unknown>('/api/telemetry/policies', { method: 'GET' }, opts);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function telemetryPreviewPolicy(
|
||||||
|
body: { targeting?: Record<string, unknown> },
|
||||||
|
opts: PlatformClientOptions
|
||||||
|
): Promise<unknown> {
|
||||||
|
return platformFetch<unknown>(
|
||||||
|
'/api/telemetry/policies/preview',
|
||||||
|
{ method: 'POST', body: JSON.stringify(body) },
|
||||||
|
opts
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function telemetryCreatePolicy(
|
||||||
|
body: Record<string, unknown>,
|
||||||
|
opts: PlatformClientOptions
|
||||||
|
): Promise<unknown> {
|
||||||
|
return platformFetch<unknown>(
|
||||||
|
'/api/telemetry/policies',
|
||||||
|
{ method: 'POST', body: JSON.stringify(body) },
|
||||||
|
opts
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function telemetryUpdatePolicy(
|
||||||
|
policyId: string,
|
||||||
|
body: Record<string, unknown>,
|
||||||
|
opts: PlatformClientOptions
|
||||||
|
): Promise<unknown> {
|
||||||
|
return platformFetch<unknown>(
|
||||||
|
`/api/telemetry/policies/${encodeURIComponent(policyId)}`,
|
||||||
|
{ method: 'PUT', body: JSON.stringify(body) },
|
||||||
|
opts
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function telemetryDeletePolicy(
|
||||||
|
policyId: string,
|
||||||
|
opts: PlatformClientOptions
|
||||||
|
): Promise<{ success: boolean }> {
|
||||||
|
return platformFetch<{ success: boolean }>(
|
||||||
|
`/api/telemetry/policies/${encodeURIComponent(policyId)}`,
|
||||||
|
{ method: 'DELETE' },
|
||||||
|
opts
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function telemetryUpdateCluster(
|
||||||
|
clusterId: string,
|
||||||
|
pk: string,
|
||||||
|
status: 'open' | 'resolved' | 'ignored',
|
||||||
|
opts: PlatformClientOptions
|
||||||
|
): Promise<unknown> {
|
||||||
|
const qs = new URLSearchParams({ pk });
|
||||||
|
return platformFetch<unknown>(
|
||||||
|
`/api/telemetry/clusters/${encodeURIComponent(clusterId)}?${qs}`,
|
||||||
|
{ method: 'PATCH', body: JSON.stringify({ status }) },
|
||||||
|
opts
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// ── Diagnostics ───────────────────────────────────────────────────────────────
|
// ── Diagnostics ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
export interface DebugSession {
|
export interface DebugSession {
|
||||||
|
|||||||
@ -4,6 +4,7 @@ import {
|
|||||||
extractionRun,
|
extractionRun,
|
||||||
extractionModels,
|
extractionModels,
|
||||||
extractionCacheStats,
|
extractionCacheStats,
|
||||||
|
extractionSidecarHealth,
|
||||||
} from '../../lib/extraction-client.js';
|
} from '../../lib/extraction-client.js';
|
||||||
|
|
||||||
registerTool({
|
registerTool({
|
||||||
@ -43,3 +44,14 @@ registerTool({
|
|||||||
return extractionCacheStats({ requestId: req.id });
|
return extractionCacheStats({ requestId: req.id });
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
|
registerTool({
|
||||||
|
name: 'extraction.sidecarHealth',
|
||||||
|
description:
|
||||||
|
'Check the health of the Python extraction sidecar process. Returns status and last-seen timestamp. Requires admin role.',
|
||||||
|
requiredRole: 'admin',
|
||||||
|
inputSchema: z.object({}),
|
||||||
|
async execute(_args, req) {
|
||||||
|
return extractionSidecarHealth({ requestId: req.id });
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|||||||
@ -139,3 +139,20 @@ registerTool({
|
|||||||
});
|
});
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
|
registerTool({
|
||||||
|
name: 'platform.diagnostics.sessions.cancel',
|
||||||
|
description:
|
||||||
|
'Cancel an active remote debug session immediately. Stops further data collection. Requires admin role.',
|
||||||
|
requiredRole: 'admin',
|
||||||
|
inputSchema: z.object({
|
||||||
|
sessionId: z.string().min(1).describe('Debug session ID to cancel'),
|
||||||
|
}),
|
||||||
|
async execute(args, req) {
|
||||||
|
return diagnosticsUpdateSession(
|
||||||
|
args.sessionId,
|
||||||
|
{ status: 'cancelled' },
|
||||||
|
{ token: tokenOf(req), requestId: req.id }
|
||||||
|
);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|||||||
@ -0,0 +1,180 @@
|
|||||||
|
import { z } from 'zod';
|
||||||
|
import { registerTool } from '../tools/registry.js';
|
||||||
|
import {
|
||||||
|
telemetryListPolicies,
|
||||||
|
telemetryPreviewPolicy,
|
||||||
|
telemetryCreatePolicy,
|
||||||
|
telemetryUpdatePolicy,
|
||||||
|
telemetryDeletePolicy,
|
||||||
|
telemetryUpdateCluster,
|
||||||
|
} from '../../lib/platform-client.js';
|
||||||
|
import type { McpToolRequest } from '../tools/types.js';
|
||||||
|
|
||||||
|
const tokenOf = (req: McpToolRequest) => req.headers.authorization?.slice(7);
|
||||||
|
|
||||||
|
// ── Shared targeting sub-schema ────────────────────────────────────────────
|
||||||
|
const TargetingSchema = z
|
||||||
|
.object({
|
||||||
|
userIds: z.array(z.string()).optional(),
|
||||||
|
platforms: z.array(z.string()).optional().describe('e.g. ios, android, web'),
|
||||||
|
channels: z.array(z.string()).optional().describe('e.g. release, beta, internal'),
|
||||||
|
osFamilies: z.array(z.string()).optional(),
|
||||||
|
appVersions: z.array(z.string()).optional(),
|
||||||
|
releaseChannels: z.array(z.string()).optional(),
|
||||||
|
countryCodes: z.array(z.string()).optional(),
|
||||||
|
percentage: z.coerce.number().min(0).max(100).optional().describe('% of installs to target'),
|
||||||
|
})
|
||||||
|
.optional()
|
||||||
|
.describe('Targeting criteria — omit to apply to all installs');
|
||||||
|
|
||||||
|
// ── platform.telemetry.policies.list ──────────────────────────────────────
|
||||||
|
|
||||||
|
registerTool({
|
||||||
|
name: 'platform.telemetry.policies.list',
|
||||||
|
description: 'List all telemetry collection policies for a product. Requires admin role.',
|
||||||
|
requiredRole: 'admin',
|
||||||
|
inputSchema: z.object({
|
||||||
|
productId: z.string().min(1).describe('Product ID to scope the query'),
|
||||||
|
}),
|
||||||
|
async execute(args, req) {
|
||||||
|
return telemetryListPolicies({
|
||||||
|
token: tokenOf(req),
|
||||||
|
requestId: req.id,
|
||||||
|
productId: args.productId,
|
||||||
|
});
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── platform.telemetry.policies.preview ──────────────────────────────────
|
||||||
|
|
||||||
|
registerTool({
|
||||||
|
name: 'platform.telemetry.policies.preview',
|
||||||
|
description:
|
||||||
|
'Preview how many clients a targeting config would match (dry-run before creating a policy). Requires admin role.',
|
||||||
|
requiredRole: 'admin',
|
||||||
|
inputSchema: z.object({
|
||||||
|
productId: z.string().min(1).describe('Product ID to scope the preview'),
|
||||||
|
targeting: TargetingSchema,
|
||||||
|
}),
|
||||||
|
async execute(args, req) {
|
||||||
|
return telemetryPreviewPolicy(
|
||||||
|
{ targeting: args.targeting as Record<string, unknown> | undefined },
|
||||||
|
{ token: tokenOf(req), requestId: req.id, productId: args.productId }
|
||||||
|
);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── platform.telemetry.policies.create ───────────────────────────────────
|
||||||
|
|
||||||
|
registerTool({
|
||||||
|
name: 'platform.telemetry.policies.create',
|
||||||
|
description: [
|
||||||
|
'Create a telemetry collection policy. expiresAt is REQUIRED — never create an open-ended policy.',
|
||||||
|
'eventTypes defaults to [warn, error, fatal]. samplingRate 0.0–1.0 (1.0 = 100%). Requires admin role.',
|
||||||
|
].join(' '),
|
||||||
|
requiredRole: 'admin',
|
||||||
|
inputSchema: z.object({
|
||||||
|
productId: z.string().min(1).describe('Product ID'),
|
||||||
|
name: z.string().min(1).max(200).describe('Human-readable policy name'),
|
||||||
|
description: z.string().optional(),
|
||||||
|
expiresAt: z
|
||||||
|
.string()
|
||||||
|
.datetime()
|
||||||
|
.describe('ISO 8601 expiry — REQUIRED. Policies must not run indefinitely.'),
|
||||||
|
eventTypes: z
|
||||||
|
.array(z.enum(['debug', 'info', 'warn', 'error', 'fatal']))
|
||||||
|
.optional()
|
||||||
|
.describe('Event types to collect (default: warn, error, fatal)'),
|
||||||
|
modules: z.array(z.string()).optional().describe('Module names to target (empty = all)'),
|
||||||
|
samplingRate: z.coerce
|
||||||
|
.number()
|
||||||
|
.min(0)
|
||||||
|
.max(1)
|
||||||
|
.optional()
|
||||||
|
.describe('Fraction of events to collect (default: 1.0)'),
|
||||||
|
enabled: z.boolean().optional().describe('Enable immediately (default: true)'),
|
||||||
|
priority: z.coerce.number().min(0).max(999).optional(),
|
||||||
|
startsAt: z.string().datetime().optional(),
|
||||||
|
targeting: TargetingSchema,
|
||||||
|
}),
|
||||||
|
async execute(args, req) {
|
||||||
|
const { productId, ...body } = args;
|
||||||
|
return telemetryCreatePolicy(body as Record<string, unknown>, {
|
||||||
|
token: tokenOf(req),
|
||||||
|
requestId: req.id,
|
||||||
|
productId,
|
||||||
|
});
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── platform.telemetry.policies.update ──────────────────────────────────
|
||||||
|
|
||||||
|
registerTool({
|
||||||
|
name: 'platform.telemetry.policies.update',
|
||||||
|
description: 'Update an existing telemetry policy. All fields are optional. Requires admin role.',
|
||||||
|
requiredRole: 'admin',
|
||||||
|
inputSchema: z.object({
|
||||||
|
productId: z.string().min(1).describe('Product ID (for x-product-id scoping)'),
|
||||||
|
policyId: z.string().min(1).describe('Policy ID to update'),
|
||||||
|
name: z.string().min(1).max(200).optional(),
|
||||||
|
description: z.string().optional(),
|
||||||
|
enabled: z.boolean().optional(),
|
||||||
|
expiresAt: z.string().datetime().optional().describe('Update or extend expiry (ISO 8601)'),
|
||||||
|
eventTypes: z.array(z.enum(['debug', 'info', 'warn', 'error', 'fatal'])).optional(),
|
||||||
|
modules: z.array(z.string()).optional(),
|
||||||
|
samplingRate: z.coerce.number().min(0).max(1).optional(),
|
||||||
|
targeting: TargetingSchema,
|
||||||
|
}),
|
||||||
|
async execute(args, req) {
|
||||||
|
const { productId, policyId, ...body } = args;
|
||||||
|
return telemetryUpdatePolicy(policyId, body as Record<string, unknown>, {
|
||||||
|
token: tokenOf(req),
|
||||||
|
requestId: req.id,
|
||||||
|
productId,
|
||||||
|
});
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── platform.telemetry.policies.delete ──────────────────────────────────
|
||||||
|
|
||||||
|
registerTool({
|
||||||
|
name: 'platform.telemetry.policies.delete',
|
||||||
|
description: 'Delete a telemetry policy by ID. Requires admin role.',
|
||||||
|
requiredRole: 'admin',
|
||||||
|
inputSchema: z.object({
|
||||||
|
productId: z.string().min(1).describe('Product ID (for x-product-id scoping)'),
|
||||||
|
policyId: z.string().min(1).describe('Policy ID to delete'),
|
||||||
|
}),
|
||||||
|
async execute(args, req) {
|
||||||
|
return telemetryDeletePolicy(args.policyId, {
|
||||||
|
token: tokenOf(req),
|
||||||
|
requestId: req.id,
|
||||||
|
productId: args.productId,
|
||||||
|
});
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── platform.telemetry.clusters.updateStatus ────────────────────────────
|
||||||
|
|
||||||
|
registerTool({
|
||||||
|
name: 'platform.telemetry.clusters.updateStatus',
|
||||||
|
description:
|
||||||
|
'Resolve or ignore an error cluster (fingerprinted error group). Use resolved when fixed, ignored to suppress. Requires admin role.',
|
||||||
|
requiredRole: 'admin',
|
||||||
|
inputSchema: z.object({
|
||||||
|
productId: z.string().min(1).describe('Product ID (for x-product-id scoping)'),
|
||||||
|
clusterId: z.string().min(1).describe('Cluster ID (fingerprint:yyyyMM)'),
|
||||||
|
pk: z
|
||||||
|
.string()
|
||||||
|
.min(1)
|
||||||
|
.describe('Cluster partition key (productId:platform:module — from clusters list result)'),
|
||||||
|
status: z.enum(['open', 'resolved', 'ignored']).describe('New cluster status'),
|
||||||
|
}),
|
||||||
|
async execute(args, req) {
|
||||||
|
return telemetryUpdateCluster(args.clusterId, args.pk, args.status, {
|
||||||
|
token: tokenOf(req),
|
||||||
|
requestId: req.id,
|
||||||
|
productId: args.productId,
|
||||||
|
});
|
||||||
|
},
|
||||||
|
});
|
||||||
@ -18,6 +18,7 @@ import { toolRoutes } from './modules/tools/routes.js';
|
|||||||
|
|
||||||
// Register all tool namespaces (side-effect: populates the tool registry)
|
// Register all tool namespaces (side-effect: populates the tool registry)
|
||||||
import './modules/platform/telemetry-tools.js';
|
import './modules/platform/telemetry-tools.js';
|
||||||
|
import './modules/platform/telemetry-policy-tools.js';
|
||||||
import './modules/platform/diagnostics-tools.js';
|
import './modules/platform/diagnostics-tools.js';
|
||||||
import './modules/extraction/extraction-tools.js';
|
import './modules/extraction/extraction-tools.js';
|
||||||
import './modules/support/debug-pack.js';
|
import './modules/support/debug-pack.js';
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user