From 06aca239f4a8cda10d19c33c408d40bfd5432680 Mon Sep 17 00:00:00 2001 From: Saravana Achu Mac Date: Sat, 14 Feb 2026 00:23:37 -0800 Subject: [PATCH] chore(security): add secret scanning + playbook --- .gitignore | 7 ++ .husky/pre-commit | 3 + .husky/pre-push | 13 +++ AGENTS.md | 1 + AI.dev/SKILLS/security-auditing.md | 10 +-- .../CODEX_SESSION_SUMMARY_AND_PLAYBOOK.md | 87 ++++++++++++++++++ scripts/secret-scan-repo.sh | 52 +++++++++++ scripts/secret-scan-staged.sh | 89 +++++++++++++++++++ scripts/setup-husky.sh | 23 ++++- 9 files changed, 279 insertions(+), 6 deletions(-) create mode 100755 .husky/pre-push create mode 100644 docs/WINDSURF/CODEX_SESSION_SUMMARY_AND_PLAYBOOK.md create mode 100755 scripts/secret-scan-repo.sh create mode 100755 scripts/secret-scan-staged.sh diff --git a/.gitignore b/.gitignore index 87274346..2445854b 100644 --- a/.gitignore +++ b/.gitignore @@ -3,5 +3,12 @@ dist/ coverage/ .DS_Store *.tsbuildinfo + +# Env / Secrets .env .env.local +.env.*.local +*.pem +*.p12 +*.pfx +*.key diff --git a/.husky/pre-commit b/.husky/pre-commit index af68d84c..5e09735a 100755 --- a/.husky/pre-commit +++ b/.husky/pre-commit @@ -8,6 +8,9 @@ if [ "$HUSKY_ENABLED" = "false" ]; then exit 0 fi +echo "πŸ” Scanning staged changes for secrets..." +bash scripts/secret-scan-staged.sh + echo "🐢 Running pre-commit hooks for common platform..." # Run lint-staged on staged files diff --git a/.husky/pre-push b/.husky/pre-push new file mode 100755 index 00000000..5379408b --- /dev/null +++ b/.husky/pre-push @@ -0,0 +1,13 @@ +#!/usr/bin/env sh +. "$(dirname -- "$0")/_/husky.sh" + +# Check if Husky is disabled via environment variable +if [ "$HUSKY_ENABLED" = "false" ]; then + echo "⚠️ Husky disabled via HUSKY_ENABLED=false" + echo "πŸ’‘ To re-enable: unset HUSKY_ENABLED or export HUSKY_ENABLED=true" + exit 0 +fi + +echo "πŸ” Scanning tracked files for secrets before push..." +bash scripts/secret-scan-repo.sh + diff --git a/AGENTS.md b/AGENTS.md index c9d3a858..cbf3b60d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -98,6 +98,7 @@ learning_ai_common_plat/ - Never use `console.log` in production code β€” use `req.log` or `app.log` in Fastify - Never use `any` type β€” use Zod inference or explicit types - Never hardcode secrets or API keys +- Secret guardrails: Husky runs `scripts/secret-scan-staged.sh` (pre-commit) and `scripts/secret-scan-repo.sh` (pre-push). See `docs/WINDSURF/CODEX_SESSION_SUMMARY_AND_PLAYBOOK.md`. - Never modify tests to make them pass β€” fix the actual code - Never delete existing comments or documentation unless explicitly asked - Never add emojis to code unless explicitly asked diff --git a/AI.dev/SKILLS/security-auditing.md b/AI.dev/SKILLS/security-auditing.md index 8039bfbb..96945541 100644 --- a/AI.dev/SKILLS/security-auditing.md +++ b/AI.dev/SKILLS/security-auditing.md @@ -269,11 +269,11 @@ AZURE_SPEECH_KEY= AZURE_OPENAI_KEY= # .env.local (gitignored) -COSMOS_ENDPOINT=https://your-cosmos.documents.azure.com:443/ -COSMOS_KEY=your-actual-key-here -JWT_SECRET=your-super-secret-jwt-key-32-chars -AZURE_SPEECH_KEY=your-speech-key -AZURE_OPENAI_KEY=your-openai-key +COSMOS_ENDPOINT=https://.documents.azure.com:443/ +COSMOS_KEY= +JWT_SECRET= +AZURE_SPEECH_KEY= +AZURE_OPENAI_KEY= ``` ### Git Hooks for Security diff --git a/docs/WINDSURF/CODEX_SESSION_SUMMARY_AND_PLAYBOOK.md b/docs/WINDSURF/CODEX_SESSION_SUMMARY_AND_PLAYBOOK.md new file mode 100644 index 00000000..618dc90d --- /dev/null +++ b/docs/WINDSURF/CODEX_SESSION_SUMMARY_AND_PLAYBOOK.md @@ -0,0 +1,87 @@ +# Session Summary + Reusable Playbook (Common Platform) + +> **Audience:** Agents working on BytelystAI repos (MindLyst/LysnrAI/common-platform) who need a repeatable checklist. +> **Scope:** Secrets hygiene + repo guardrails (commit/push blockers) for `learning_ai_common_plat`. +> **Source playbook:** `../learning_multimodal_memory_agents/docs/WINDSURF/CODEX_SESSION_SUMMARY_AND_PLAYBOOK.md` +> **Last updated:** 2026-02-14 + +--- + +## What We Did (This Repo) + +### 1. Added Guardrails So Secrets Don’t Land In Git Again + +Scripts: + +- Staged-diff scan (blocks commits): `scripts/secret-scan-staged.sh` +- Tracked-file scan (blocks pushes / manual checks): `scripts/secret-scan-repo.sh` + +Git hooks (Husky): + +- `.husky/pre-commit` now runs `scripts/secret-scan-staged.sh` and then `lint-staged` +- `.husky/pre-push` runs `scripts/secret-scan-repo.sh` + +Repo hygiene: + +- `.gitignore` updated to ignore `.env*` locals and common key/cert formats: `*.pem`, `*.p12`, `*.pfx`, `*.key` + +--- + +## Reusable Playbook (Apply To Other Repos) + +Use this as a checklist for a new repo or a repo that accidentally leaked secrets. + +### A. Secrets Hygiene (Do This First) + +- [ ] Inventory all secrets the repo uses (Cosmos, Storage, OpenAI, Speech, Notification Hub, App Insights, Stripe, etc.). +- [ ] Create/choose an Azure Key Vault per environment (`kv-`). +- [ ] Pick canonical secret names (prefix by product): `mindlyst-*`, `lysnr-*`, etc. +- [ ] Move secret **values** into Key Vault. +- [ ] Remove secret **values** from: + - [ ] Markdown docs + - [ ] `.env*` files + - [ ] source code + - [ ] CI logs / README examples +- [ ] If a secret ever landed in git history: + - [ ] Treat it as compromised + - [ ] Rotate it (do not delay for β€œlater cleanup”) + +### B. Guardrails (Prevent Regressions) + +- [ ] Add `.gitignore` entries: + - [ ] `.env`, `.env.local`, `.env.*.local` + - [ ] `*.pem`, `*.p12`, `*.pfx`, `*.key` +- [ ] Add staged secret scanning (commit blocker): + - [ ] `scripts/secret-scan-staged.sh` + - [ ] Hook it via Husky `.husky/pre-commit` (or another hooks system) +- [ ] Add tracked-file scanning (push blocker): + - [ ] `scripts/secret-scan-repo.sh` + - [ ] Hook it via `.husky/pre-push` + +### C. Basic Abuse Controls For Any LLM Routes (Denial-of-Wallet Protection) + +- [ ] Identify every route that calls an LLM provider (Azure OpenAI/OpenAI/etc.). +- [ ] Add request body caps. +- [ ] Add rate limiting (per-user preferred; fallback per-IP). +- [ ] Add field-level guards (max message/content chars; max history length + total chars). +- [ ] Document defaults + env knobs in a single doc. +- [ ] For production / multi-instance: replace in-memory rate limiting with Redis/Upstash/platform-native limiting. + +### D. Beta Readiness Tracking + +- [ ] Create a single β€œgo/no-go” checklist doc and keep it current: + - [ ] Verified checks (lint/build/tests, secret scan) + - [ ] Remaining blockers (auth, hosting, KV integration, monitoring, backups) + +--- + +## Quick Commands (Local Agent Workflow) + +```bash +# Secret scan (tracked files) +bash scripts/secret-scan-repo.sh + +# Common platform (TS) +pnpm test +pnpm typecheck +``` diff --git a/scripts/secret-scan-repo.sh b/scripts/secret-scan-repo.sh new file mode 100755 index 00000000..efb9a5e7 --- /dev/null +++ b/scripts/secret-scan-repo.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +# +# Scans tracked files for common secret patterns. +# Intended for manual use (or as part of quick-check). Avoids printing matching lines. +# +# Note: This does not scan git history. Use a dedicated tool (e.g. gitleaks) for history scanning. + +set -euo pipefail + +ROOT="$(git rev-parse --show-toplevel 2>/dev/null || true)" +if [[ -z "${ROOT}" ]]; then + exit 0 +fi + +cd "${ROOT}" + +fail=0 + +check() { + local name="$1" + local pattern="$2" + + # -l prints only filenames (no secret material in output) + if git grep -l -E "${pattern}" -- . >/dev/null 2>&1; then + echo "βœ— ${name}: potential matches found in:" + git grep -l -E "${pattern}" -- . | sed 's/^/ - /' + echo + fail=1 + fi +} + +check "Private key blocks" '-----BEGIN (RSA |EC |OPENSSH )?PRIVATE KEY-----' +check "Azure storage AccountKey leaks" 'AccountKey=[A-Za-z0-9+/=_-]{20,}' +check "Azure SharedAccessKey leaks" 'SharedAccessKey=[A-Za-z0-9+/=_-]{20,}' +check "COSMOS_KEY assignment leaks" 'COSMOS_KEY[[:space:]]*=[[:space:]]*[A-Za-z0-9+/=_-]{20,}' +check "AZURE_OPENAI_KEY assignment leaks" 'AZURE_OPENAI_KEY[[:space:]]*=[[:space:]]*[A-Za-z0-9+/=_-]{20,}' +check "AZURE_SPEECH_KEY assignment leaks" 'AZURE_SPEECH_KEY[[:space:]]*=[[:space:]]*[A-Za-z0-9+/=_-]{20,}' +check "JWT_SECRET hex-like assignments" 'JWT_SECRET[[:space:]]*=[[:space:]]*[0-9a-fA-F]{32,}' +check "OpenAI API keys (sk-...)" 'sk-[A-Za-z0-9]{20,}' +check "Stripe secret keys (sk_live_/sk_test_)" 'sk_(live|test)_[A-Za-z0-9]{20,}' +check "Stripe webhook secrets (whsec_...)" 'whsec_[A-Za-z0-9]{20,}' +check "Perplexity API keys (pplx-...)" 'pplx-[A-Za-z0-9]{20,}' +check "AWS access key ids (AKIA...)" 'AKIA[0-9A-Z]{16}' +check "Google API keys (AIza...)" 'AIza[0-9A-Za-z\-_]{35}' + +if [[ "${fail}" -ne 0 ]]; then + echo "Secret scan failed." + echo "Fix the files above (move values to Key Vault / env vars) and retry." + exit 1 +fi + +echo "βœ“ Secret scan passed (tracked files)" diff --git a/scripts/secret-scan-staged.sh b/scripts/secret-scan-staged.sh new file mode 100755 index 00000000..23d935e3 --- /dev/null +++ b/scripts/secret-scan-staged.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash +# +# Blocks commits that introduce obvious secrets (Azure keys, connection strings, private keys, etc.). +# This scans only staged changes (the git index) to avoid false positives from unstaged work. +# +# Note: Avoid printing matched lines to keep secrets out of terminal scrollback/logs. + +set -euo pipefail + +ROOT="$(git rev-parse --show-toplevel 2>/dev/null || true)" +if [[ -z "${ROOT}" ]]; then + exit 0 +fi + +cd "${ROOT}" + +# Nothing staged -> nothing to scan. +if git diff --cached --quiet; then + exit 0 +fi + +DIFF="$(git diff --cached --no-color --unified=0)" +if [[ -z "${DIFF}" ]]; then + exit 0 +fi + +perl -ne ' + our ($file, %hits, $exit); + + if (/^\+\+\+ b\/(.*)$/) { + $file = $1; + next; + } + + next unless defined $file; + next unless /^\+(?!\+\+\+)(.*)$/; + my $line = $1; + + my @checks = ( + ["PRIVATE_KEY_BLOCK", qr/-----BEGIN (?:RSA |EC |OPENSSH )?PRIVATE KEY-----/i], + + # Azure connection strings / keys (high signal, low false positives) + ["AZURE_STORAGE_ACCOUNT_KEY", qr/AccountKey=[A-Za-z0-9+\/=_-]{20,}/], + ["AZURE_SHARED_ACCESS_KEY", qr/SharedAccessKey=[A-Za-z0-9+\/=_-]{20,}/], + + # Common env var leaks + ["COSMOS_KEY_ASSIGNMENT", qr/\bCOSMOS_KEY\s*=\s*[A-Za-z0-9+\/=_-]{20,}/], + ["AZURE_OPENAI_KEY_ASSIGNMENT", qr/\bAZURE_OPENAI_KEY\s*=\s*[A-Za-z0-9+\/=_-]{20,}/], + ["AZURE_SPEECH_KEY_ASSIGNMENT", qr/\bAZURE_SPEECH_KEY\s*=\s*[A-Za-z0-9+\/=_-]{20,}/], + ["JWT_SECRET_HEX_ASSIGNMENT", qr/\bJWT_SECRET\s*=\s*[0-9a-fA-F]{32,}\b/], + + # OpenAI / Stripe / Perplexity + ["OPENAI_API_KEY", qr/\bOPENAI_API_KEY\s*=\s*sk-[A-Za-z0-9]{20,}/], + ["OPENAI_KEY_LIKE", qr/\bsk-[A-Za-z0-9]{20,}\b/], + ["STRIPE_SECRET_KEY", qr/\bsk_(?:live|test)_[A-Za-z0-9]{20,}\b/], + ["STRIPE_WEBHOOK_SECRET", qr/\bwhsec_[A-Za-z0-9]{20,}\b/], + ["PERPLEXITY_API_KEY", qr/\bpplx-[A-Za-z0-9]{20,}\b/], + + # Cloud provider patterns + ["AWS_ACCESS_KEY_ID", qr/\bAKIA[0-9A-Z]{16}\b/], + ["GOOGLE_API_KEY", qr/\bAIza[0-9A-Za-z\-_]{35}\b/], + ); + + for my $check (@checks) { + my ($name, $re) = @$check; + if ($line =~ $re) { + $hits{$name}{$file} = 1; + $exit = 1; + } + } + + END { + if (!$exit) { + exit 0; + } + + print STDERR "βœ— Potential secrets detected in staged changes:\n"; + for my $name (sort keys %hits) { + for my $path (sort keys %{ $hits{$name} }) { + print STDERR " - ${name}: ${path}\n"; + } + } + print STDERR "\nCommit aborted.\n"; + print STDERR "Move secrets to Azure Key Vault (or env vars injected at deploy-time), then retry.\n"; + print STDERR "If you believe this is a false positive, refactor the value into a placeholder.\n"; + exit 1; + } +' <<< "${DIFF}" + diff --git a/scripts/setup-husky.sh b/scripts/setup-husky.sh index a03d0b88..99ecf3e6 100755 --- a/scripts/setup-husky.sh +++ b/scripts/setup-husky.sh @@ -22,16 +22,37 @@ if [ "$HUSKY_ENABLED" = "false" ]; then exit 0 fi +echo "πŸ” Scanning staged changes for secrets..." +bash scripts/secret-scan-staged.sh + echo "🐢 Running pre-commit hooks for common platform..." # Run lint-staged on staged files -pnpm dlx lint-staged +pnpm exec lint-staged +EOF + +# Create pre-push hook with HUSKY_ENABLED flag +cat > .husky/pre-push << 'EOF' +#!/usr/bin/env sh +. "$(dirname -- "$0")/_/husky.sh" + +# Check if Husky is disabled via environment variable +if [ "$HUSKY_ENABLED" = "false" ]; then + echo "⚠️ Husky disabled via HUSKY_ENABLED=false" + echo "πŸ’‘ To re-enable: unset HUSKY_ENABLED or export HUSKY_ENABLED=true" + exit 0 +fi + +echo "πŸ” Scanning tracked files for secrets before push..." +bash scripts/secret-scan-repo.sh EOF # Make the hook executable chmod +x .husky/pre-commit +chmod +x .husky/pre-push echo "βœ… Pre-commit hook created successfully!" +echo "βœ… Pre-push hook created successfully!" echo "" echo "πŸ“– Usage:" echo " Normal commit: git commit -m 'feat: add feature'"