feat(data): add seed bootstrap strategy

This commit is contained in:
Saravana Achu Mac 2026-05-05 13:45:43 -07:00
parent 1059b138d7
commit efcc5fa044
10 changed files with 237 additions and 4 deletions

View File

@ -136,3 +136,4 @@ Current baseline note: after common-platform workspace alignment, `pnpm install
- [`docs/MOBILE_PRODUCTION_BUILD_AND_SMOKE.md`](docs/MOBILE_PRODUCTION_BUILD_AND_SMOKE.md) — Expo build notes and iOS/Android smoke checklist
- [`docs/RELEASE_CHECKLIST.md`](docs/RELEASE_CHECKLIST.md) — Release notes template, deploy checklist, rollback, migrations, and monitoring placeholders
- [`docs/COSMOS_DATA_OPERATIONS.md`](docs/COSMOS_DATA_OPERATIONS.md) — Cosmos containers, indexes, retention, and backup/restore approach
- [`docs/SEED_BOOTSTRAP_STRATEGY.md`](docs/SEED_BOOTSTRAP_STRATEGY.md) — Built-in prompt, intake rule, onboarding workspace, and feature-flag bootstrap strategy

View File

@ -7,6 +7,7 @@
"type": "module",
"scripts": {
"dev": "tsx watch src/server.ts",
"bootstrap:seed": "tsx src/scripts/bootstrap-seed.ts",
"build": "tsc",
"start": "node dist/server.js",
"test": "vitest run",

View File

@ -0,0 +1,10 @@
import { describe, expect, it } from 'vitest';
import { DISPLAY_NAME, PRODUCT_ID, productConfig } from './product-config.js';
describe('product-config', () => {
it('loads the NoteLett product identity from the shared product manifest', () => {
expect(PRODUCT_ID).toBe('notelett');
expect(DISPLAY_NAME).toBe('NoteLett');
expect(productConfig.domain).toBe('notelett.app');
});
});

View File

@ -1,6 +1,6 @@
import { loadProductIdentity } from '@bytelyst/config';
const identity = loadProductIdentity(new URL('../../shared/product.json', import.meta.url).pathname);
const identity = loadProductIdentity(new URL('../../../shared/product.json', import.meta.url).pathname);
export const productConfig = identity;
export const PRODUCT_ID = identity.productId;

View File

@ -39,6 +39,17 @@ export async function deleteIntakeRule(id: string, userId: string): Promise<void
await rulesCollection().delete(id, userId);
}
export async function upsertBuiltinIntakeRule(
rule: Omit<IntakeRuleDoc, 'createdAt' | 'updatedAt' | '_ts' | '_etag'>,
): Promise<IntakeRuleDoc> {
const now = new Date().toISOString();
return rulesCollection().upsert({
...rule,
createdAt: now,
updatedAt: now,
});
}
// ── Intake Jobs ──────────────────────────────────────────────────
function jobsCollection() {

View File

@ -170,7 +170,7 @@ export async function listPromptTemplates(
/**
* Upsert a built-in prompt template (used by seed).
* Uses slug as the id for built-ins so they're idempotent.
* Built-ins carry deterministic ids so repeated seed runs are idempotent.
*/
export async function upsertBuiltinTemplate(
template: Omit<PromptTemplateDoc, 'createdAt' | 'updatedAt' | '_ts' | '_etag'>,

View File

@ -0,0 +1,59 @@
import { pathToFileURL } from 'node:url';
import { createLogger } from '@bytelyst/logger';
import { config } from '../lib/config.js';
import { initCosmosIfNeeded } from '../lib/cosmos-init.js';
import { initDatastore } from '../lib/datastore.js';
import { getAllFlags } from '../lib/feature-flags.js';
import { PRODUCT_ID } from '../lib/product-config.js';
import { getBuiltinIntakeRules } from '../modules/intake/seed-rules.js';
import { upsertBuiltinIntakeRule } from '../modules/intake/repository.js';
import { getBuiltinTemplates } from '../modules/note-prompts/seed.js';
import { upsertBuiltinTemplate } from '../modules/note-prompts/repository.js';
export interface SeedBootstrapResult {
productId: string;
promptTemplates: number;
intakeRules: number;
featureFlags: number;
}
const logger = createLogger({
service: `${config.SERVICE_NAME}-seed-bootstrap`,
isDev: config.NODE_ENV !== 'production',
});
export async function runSeedBootstrap(): Promise<SeedBootstrapResult> {
await initCosmosIfNeeded(logger);
initDatastore();
const templates = getBuiltinTemplates();
for (const template of templates) {
await upsertBuiltinTemplate(template);
}
const intakeRules = getBuiltinIntakeRules();
for (const rule of intakeRules) {
await upsertBuiltinIntakeRule(rule);
}
const flags = getAllFlags();
return {
productId: PRODUCT_ID,
promptTemplates: templates.length,
intakeRules: intakeRules.length,
featureFlags: Object.keys(flags).length,
};
}
const invokedPath = process.argv[1] ? pathToFileURL(process.argv[1]).href : null;
if (invokedPath === import.meta.url) {
runSeedBootstrap()
.then((result) => {
logger.info('Seed bootstrap complete', { ...result });
})
.catch((err) => {
logger.error('Seed bootstrap failed', err);
process.exitCode = 1;
});
}

View File

@ -108,8 +108,9 @@ Do not place secrets in `NEXT_PUBLIC_*` or `EXPO_PUBLIC_*` variables.
## Migration And Seed Checklist
- Built-in prompt templates are seeded once and idempotently.
- Default workspace/bootstrap behavior is deterministic.
- Run `pnpm run seed:bootstrap` with production backend environment; see `docs/SEED_BOOTSTRAP_STRATEGY.md`.
- Built-in prompt templates and intake rules are seeded idempotently.
- Default workspace/bootstrap behavior is per-user and deterministic.
- Cosmos schema changes are backward compatible or have a documented backfill.
- Encrypted-field migrations have a dry-run and rollback note.
- Long-running backfills have owner, estimated duration, progress logs, and stop criteria.

View File

@ -0,0 +1,149 @@
# NoteLett Seed And Bootstrap Strategy
Date: May 5, 2026
Owner: NoteLett backend and release operators
Common platform: `../learning_ai/learning_ai_common_plat`
## Purpose
This document defines the deterministic seed and bootstrap strategy for production data that must exist before users depend on NoteLett workflows. It covers built-in prompt templates, intake rules, the per-user default workspace flow, and release feature-flag defaults.
## Bootstrap Command
Run the backend seed bootstrap after Cosmos containers are available and before production smoke checks:
```bash
zsh -lc 'source ~/.zshrc; export GITEA_NPM_TOKEN; pnpm run seed:bootstrap'
```
The command executes `backend/src/scripts/bootstrap-seed.ts`, initializes the shared datastore provider, registers/optionally ensures Cosmos containers through the common-platform `@bytelyst/cosmos` path, and upserts deterministic NoteLett built-ins.
For production, provide the same backend environment used by the deployed service:
```bash
NODE_ENV=production \
DB_PROVIDER=cosmos \
COSMOS_ENDPOINT=... \
COSMOS_KEY=... \
COSMOS_DATABASE=notelett-prod \
JWT_SECRET=... \
FIELD_ENCRYPTION_ENABLED=true \
FIELD_ENCRYPTION_KEY_PROVIDER=azure-key-vault \
pnpm run seed:bootstrap
```
`COSMOS_AUTO_INIT=true` is optional. Use it only when the operator intends the command to create missing Cosmos containers; otherwise run it after infrastructure provisioning has already created them.
## Built-In Prompt Templates
Source: `backend/src/modules/note-prompts/seed.ts`
Collection: `note_prompts`
Partition key: `/userId`
Sentinel owner: `userId = "__builtin__"`
Prompt templates are generated from `getBuiltinTemplates()` with:
- `productId = PRODUCT_ID`
- `id = "builtin-" + slug`
- `isBuiltin = true`
- stable slug/category/input/output fields
`pnpm run seed:bootstrap` calls `upsertBuiltinTemplate()` for every built-in template. The upsert is safe to run repeatedly: existing ids are replaced with the current source definition and new templates are inserted when added. User-created templates are not touched.
Runtime behavior:
- `GET /api/note-prompts?includeBuiltin=true` returns persisted built-ins plus user templates.
- `POST /api/note-prompts/run` can resolve built-ins by `templateId` from the `__builtin__` partition.
- Built-ins cannot be patched or deleted through the user API.
## Built-In Intake Rules
Source: `backend/src/modules/intake/seed-rules.ts`
Collection: `note_intake_rules`
Partition key: `/userId`
Sentinel owner: `userId = "__builtin__"`
Sentinel workspace: `workspaceId = "__all__"`
Intake rules are generated from `getBuiltinIntakeRules()` with deterministic ids such as `builtin-intake-youtube`, `builtin-intake-tweet`, and `builtin-intake-generic-5`. The bootstrap command persists these rules through `upsertBuiltinIntakeRule()`.
Runtime behavior:
- Intake matching reads user rules and persisted built-ins, then merges source-defined built-ins that are not yet persisted.
- User rules can override behavior with lower priority values or explicit `templateOverride` in the intake request.
- Built-in rules cannot be patched or deleted through the user API.
The request-time merge is a safety net for development and partial deploys. Production should still run `pnpm run seed:bootstrap` so operators can inspect built-in rule rows in Cosmos and validate them during release smoke.
## Default Workspace Bootstrap
Endpoint: `POST /api/workspaces/onboarding-seed`
Feature flag: `onboarding.seed_enabled`
Collections: `workspaces`, `notes`, `note_agent_actions`
The default workspace is intentionally per-user and is not created by the global bootstrap command. A signed-in writer calls the onboarding endpoint when they have no workspaces. The endpoint:
- rejects if `onboarding.seed_enabled` is false
- rejects when the user already has any workspace
- creates one "Getting started" workspace
- creates three sample notes
- creates one sample proposed agent action
- emits `workspace.onboarding_seeded`
This keeps global production deploys from creating user-owned documents without an authenticated user context.
## Feature Flags
Source: `backend/src/lib/feature-flags.ts`
Common platform owner: `platform-service` / `@bytelyst/backend-flags`
The backend keeps local defaults for safe startup when `FEATURE_FLAGS_ENABLED=false` or the platform flag service is unavailable. Production release values should be managed in platform-service and reviewed before deploy.
Initial production recommendation:
| Flag | Initial value | Reason |
| --- | --- | --- |
| `notes.enabled` | true | Core product flow |
| `workspaces.enabled` | true | Core product flow |
| `relationships.enabled` | true | Core product flow |
| `tasks.enabled` | true | Core product flow |
| `artifacts.enabled` | true | Core product flow |
| `mcp.enabled` | true | Agent integration |
| `search.hybrid_enabled` | true | Release search behavior |
| `copilot.enabled` | true | Existing copilot endpoint gate |
| `chat.rag_enabled` | true | Existing workspace chat gate |
| `onboarding.seed_enabled` | true | New-user workspace bootstrap |
| `notelett_duplicate_check_enabled` | true | Smart suggestion support |
| `notelett_suggest_links_enabled` | true | Smart suggestion support |
| `notelett_intake_enabled` | true | Release intake flow |
| `notelett_smart_actions_enabled` | false until smoke passes | LLM-dependent workflow |
| `notelett_auto_summarize_enabled` | false | Background LLM cost/risk |
| `notelett_auto_embed_enabled` | false | Background embedding cost/risk |
| `notelett_auto_link_enabled` | false | Background mutation risk |
| `notelett_copilot_llm_enabled` | false until LLM smoke passes | LLM-dependent workflow |
| `notelett_voice_capture_enabled` | false | Mobile capture gate |
| `notelett_scheduled_actions_enabled` | false until scheduler smoke passes | Background LLM/webhook risk |
| `notelett_webhooks_enabled` | false until webhook smoke passes | External callback risk |
| `notelett_collaborative_sharing_enabled` | false until sharing smoke passes | Cross-user access risk |
| `notelett_push_notifications_enabled` | false | Mobile notification gate |
## Release Order
1. Deploy or provision Cosmos containers from `backend/src/lib/cosmos-init.ts`.
2. Run `pnpm run seed:bootstrap` with production backend env.
3. Confirm the seed log reports the expected counts for prompt templates, intake rules, and feature flags.
4. Run platform flag smoke from `docs/PLATFORM_SMOKE_CHECKS.md`.
5. Set production feature-flag values in platform-service.
6. Run `pnpm run smoke:local` or the environment-specific backend smoke.
7. Sign in as a test user and call onboarding seed only for that test account.
## Verification
For code changes to seed/bootstrap behavior, run:
```bash
zsh -lc 'source ~/.zshrc; export GITEA_NPM_TOKEN; pnpm --filter @notelett/backend run typecheck'
zsh -lc 'source ~/.zshrc; export GITEA_NPM_TOKEN; pnpm --filter @notelett/backend exec vitest run src/modules/note-prompts/note-prompts.test.ts src/modules/intake/routes.test.ts src/modules/workspaces/routes.integration.test.ts'
zsh -lc 'source ~/.zshrc; export GITEA_NPM_TOKEN; DB_PROVIDER=memory pnpm run seed:bootstrap'
```
Record the seed output in the release notes. A healthy memory-mode bootstrap reports the current built-in prompt count, six intake rules, and the registered feature-flag count.

View File

@ -8,6 +8,7 @@
"build": "pnpm --filter @notelett/backend run build && pnpm --filter @notelett/web run build",
"smoke:local": "bash scripts/local-smoke.sh",
"smoke:compose": "bash scripts/compose-smoke.sh",
"seed:bootstrap": "pnpm --filter @notelett/backend run bootstrap:seed",
"audit:release-guards": "bash scripts/release-guard-audit.sh",
"dependency:health": "bash scripts/dependency-health.sh",
"verify": "pnpm run typecheck && pnpm run test && pnpm run build",