#!/usr/bin/env bash # check-rule-violations.sh — Ecosystem-wide AGENTS.md rule compliance scanner. # # Scans every repo in repos.txt for violations of the canonical rules defined # in AI.dev/SKILLS/agent-behavior-guidelines.md (Parts B4, B5, B7) plus a few # stack-specific per-repo MUST NOT rules (no `any`, no hardcoded hex colors). # # Output: # reports/rule-violations-YYYY-MM-DD.md — human-readable, grouped by repo # reports/rule-violations-YYYY-MM-DD.json — machine-readable, one finding per line # # Usage: # bash scripts/check-rule-violations.sh # scan all repos # bash scripts/check-rule-violations.sh # scan single repo # bash scripts/check-rule-violations.sh --quiet # only print summary # # Exit code: 0 always (scanner is informational, not gating). # # Each rule is implemented as a function: # scan_() { repo_dir="$1"; ...emit findings via emit_finding... } # Adding a new rule: # 1. Add a function below # 2. Append to RULES=() array # 3. Document severity + rationale in the function header comment # # Findings are emitted via emit_finding which appends to two global arrays # (FINDINGS_JSON and FINDINGS_MD) consumed at the end by the report writer. set -uo pipefail # NOT -e: grep returning 1 (no matches) is normal and must not abort the scan. SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPOS_TXT="${SCRIPT_DIR}/../.windsurf/workflows/repos.txt" BASE_DIR="$(cd "${SCRIPT_DIR}/../.." && pwd)" REPORTS_DIR="${SCRIPT_DIR}/../reports" TODAY="$(date +%Y-%m-%d)" JSON_OUT="${REPORTS_DIR}/rule-violations-${TODAY}.json" MD_OUT="${REPORTS_DIR}/rule-violations-${TODAY}.md" RED='\033[0;31m'; YELLOW='\033[1;33m'; GREEN='\033[0;32m'; BLUE='\033[0;34m'; NC='\033[0m' mkdir -p "$REPORTS_DIR" # ─── Mode parsing ────────────────────────────────────────────────────────────── QUIET=0 SINGLE_REPO="" for arg in "$@"; do case "$arg" in --quiet|-q) QUIET=1 ;; --help|-h) sed -n '2,30p' "$0"; exit 0 ;; -*) echo "Unknown flag: $arg" >&2; exit 1 ;; *) SINGLE_REPO="$arg" ;; esac done # ─── Repo list ───────────────────────────────────────────────────────────────── REPOS=() if [[ -n "$SINGLE_REPO" ]]; then REPOS+=("$SINGLE_REPO") else while IFS= read -r line; do [[ "$line" =~ ^[[:space:]]*# ]] && continue [[ -z "${line// }" ]] && continue REPOS+=("$line") done < "$REPOS_TXT" fi # ─── Finding accumulator ─────────────────────────────────────────────────────── # Globals updated by emit_finding(); flushed at end of each repo. declare -a FINDINGS_JSON declare -a FINDINGS_MD # Per-repo counters (reset before each repo). Bash 3.2 compatible — no # associative arrays. Ecosystem totals are computed at the end by grep'ing # the JSONL output file (each finding is one line). REPO_CRITICAL=0 REPO_MAJOR=0 REPO_MINOR=0 # emit_finding RULE_ID SEVERITY REPO FILE LINE EVIDENCE # SEVERITY: critical|major|minor emit_finding() { local rule_id="$1" severity="$2" repo="$3" file="$4" line="$5" evidence="$6" # Strip leading repo path for shorter reporting local rel_file="${file#${BASE_DIR}/${repo}/}" # JSON escape (basic — content is grep output, no nested quotes from us) local json_evidence="${evidence//\\/\\\\}" json_evidence="${json_evidence//\"/\\\"}" json_evidence="${json_evidence// /\\t}" FINDINGS_JSON+=("{\"rule\":\"$rule_id\",\"severity\":\"$severity\",\"repo\":\"$repo\",\"file\":\"$rel_file\",\"line\":$line,\"evidence\":\"$json_evidence\"}") FINDINGS_MD+=("- **[$severity]** \`$rel_file:$line\` — $evidence") case "$severity" in critical) REPO_CRITICAL=$(( REPO_CRITICAL + 1 )) ;; major) REPO_MAJOR=$(( REPO_MAJOR + 1 )) ;; minor) REPO_MINOR=$(( REPO_MINOR + 1 )) ;; esac } # ─── Shared exclusion globs (grep --exclude-dir / --exclude) ─────────────────── # # These directories are always excluded from scans. They contain either # generated, vendored, or test code (tests have a separate exclusion policy # per rule — tests are sacred but they don't violate prod-code rules). EXCLUDE_DIRS=( --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=.next --exclude-dir=.turbo --exclude-dir=dist --exclude-dir=build --exclude-dir=coverage --exclude-dir=__pycache__ --exclude-dir=.venv --exclude-dir=venv --exclude-dir=target # Rust --exclude-dir=Pods # CocoaPods --exclude-dir=DerivedData # Xcode --exclude-dir=.gradle --exclude-dir=reports --exclude-dir=test-results --exclude-dir=playwright-report --exclude-dir=.pytest_cache --exclude-dir=.ruff_cache --exclude-dir=generated # design-tokens output --exclude-dir=__LOCAL_LLMs # local LLM sandbox (not production) --exclude-dir=chat-history # IDE chat archives --exclude-dir=__experiments # experimental code --exclude-dir=experiments # experimental code --exclude-dir=_archive_helper # archived code --exclude-dir=.docker-deps # bundled npm tarballs for Docker builds ) # Test-file exclusions (used per-rule; some rules apply to tests, others don't) TEST_EXCLUDES=( --exclude-dir=__tests__ --exclude-dir=__mocks__ --exclude-dir=e2e --exclude-dir=tests --exclude=*.test.ts --exclude=*.test.tsx --exclude=*.spec.ts --exclude=*.spec.tsx --exclude=*.test.js --exclude=*.spec.js ) # ─── RULES ───────────────────────────────────────────────────────────────────── # # Each scan_* function: # • Receives $1 = absolute repo path # • Emits findings via emit_finding # • Must not error out (use || true after grep when needed) # B4 — no `console.log` in production TS/JS code. # Severity: major. Tests excluded. Allowed: console.warn/error for legit errors # in some places, but we still flag console.log to surface. scan_b4_console_log() { local repo="$1" repo_dir="$2" while IFS=: read -r file line evidence; do [[ -z "$file" ]] && continue # Skip commented lines (// or # before console.log) [[ "$evidence" =~ ^[[:space:]]*(//|#|\*) ]] && continue # Skip CLI/admin scripts — their job is to print to terminal. # Pattern: top-level scripts/ directory in either common_plat or product repos. [[ "$file" =~ (^|/)scripts/ ]] && continue # Skip scaffolder / generator CLIs (packages/create-app/). [[ "$file" =~ /packages/create-app/ ]] && continue # Skip standalone monitoring/health-check scripts (services/monitoring/). [[ "$file" =~ /services/monitoring/ ]] && continue # Skip plugins/ \u2014 Tauri / Expo / Cowork plugins are CLI-like build tools # that emit progress to stdout when invoked by their host runtime. [[ "$file" =~ (^|/)plugins/ ]] && continue # (TODO-3 resolved as of mcp-client commit: package now exposes a # `logger` callback on McpClient and uses `this.log.*` everywhere, # so no special-case exemption is needed for the console-log rule.) # Skip the logger package itself (packages/logger) — console.log IS the # implementation when the user-provided logger is not configured. [[ "$file" =~ /packages/logger/ ]] && continue # Honor `eslint-disable no-console` block directives. Look at the file # (preceding 30 lines before the offending line) so block-level disables # placed inside a wrapper if/function are recognised. local start_line=$((line - 30)) [[ "$start_line" -lt 1 ]] && start_line=1 if sed -n "${start_line},${line}p" "$file" 2>/dev/null | grep -qE 'eslint-disable[^*]*no-console'; then continue fi # Honor line-level `eslint-disable-next-line no-console` on preceding line. if [[ "$line" -gt 1 ]] && sed -n "$((line - 1))p" "$file" 2>/dev/null | grep -qE 'eslint-disable-next-line[^*]*no-console'; then continue fi emit_finding "b4-console-log" "major" "$repo" "$file" "$line" "console.log: ${evidence:0:80}" done < <(grep -rnE 'console\.log\(' "$repo_dir" \ --include='*.ts' --include='*.tsx' --include='*.js' --include='*.jsx' --include='*.mjs' \ "${EXCLUDE_DIRS[@]}" "${TEST_EXCLUDES[@]}" 2>/dev/null || true) } # B4 — no `print(` in Swift production code. # Severity: major. XCTest test files excluded. scan_b4_swift_print() { local repo="$1" repo_dir="$2" while IFS=: read -r file line evidence; do [[ -z "$file" ]] && continue [[ "$evidence" =~ ^[[:space:]]*// ]] && continue emit_finding "b4-swift-print" "major" "$repo" "$file" "$line" "Swift print(): ${evidence:0:80}" done < <(grep -rnE '\bprint\(' "$repo_dir" --include='*.swift' \ --exclude-dir=Tests --exclude='*Tests.swift' --exclude='*Test.swift' \ "${EXCLUDE_DIRS[@]}" 2>/dev/null || true) } # B4 — no `print(` in Python production code under src/ tools/ backend-python/. # Severity: major. Scripts dir and tests excluded (scripts and CLIs may print). scan_b4_python_print() { local repo="$1" repo_dir="$2" # Note: scripts/ explicitly excluded (CLIs print to terminal). Only scan # core src/library/backend trees where structlog/logging should be used. for src in src tools backend-python backend/src; do [[ -d "${repo_dir}/${src}" ]] || continue while IFS=: read -r file line evidence; do [[ -z "$file" ]] && continue [[ "$evidence" =~ ^[[:space:]]*# ]] && continue # Skip CLI entrypoint files (often named cli.py, __main__.py). [[ "$file" =~ /(cli|__main__|main)\.py$ ]] && continue # Skip mac_tooling \u2014 standalone macOS forensics CLI per its AGENTS.md. # tools/*.py all have `if __name__ == "__main__":` and are invoked # directly as CLI scripts. [[ "$repo" == "learning_ai_mac_tooling" ]] && continue # Honor `# noqa: T201` (flake8 / ruff's "print found" rule). Either # inline on the same line or on the preceding line. [[ "$evidence" =~ \#[[:space:]]*noqa([[:space:]]*:[^#]*T201) ]] && continue if [[ "$line" -gt 1 ]] && sed -n "$((line - 1))p" "$file" 2>/dev/null | grep -qE '# noqa:[^#]*T201'; then continue fi emit_finding "b4-python-print" "major" "$repo" "$file" "$line" "Python print(): ${evidence:0:80}" done < <(grep -rnE '^\s*print\(' "${repo_dir}/${src}" --include='*.py' \ --exclude-dir=tests --exclude='test_*.py' --exclude='*_test.py' \ "${EXCLUDE_DIRS[@]}" 2>/dev/null || true) done } # B4 — TypeScript `any` type usage in source code. # Severity: minor (per AGENTS.md it's a MUST NOT for most TS repos but is # pervasive in some legacy code; flagging as minor avoids drowning the report). # Excludes: tests, .d.ts files, generated code, lines that are pre-existing # `// eslint-disable` annotated. scan_ts_any_type() { local repo="$1" repo_dir="$2" while IFS=: read -r file line evidence; do [[ -z "$file" ]] && continue [[ "$evidence" =~ eslint-disable ]] && continue # Skip mac_tooling — standalone forensics toolkit; not ByteLyst-style TS # (matches hex / python-print exemptions per its own AGENTS.md). [[ "$repo" == "learning_ai_mac_tooling" ]] && continue # MCP client SDK still uses `any` for JSON-RPC payloads at the # untrusted-server boundary. Keep exempt for ts-any-type (this is a # separate concern from the logger refactor in TODO-3). [[ "$file" =~ /packages/mcp-client/ ]] && continue # Skip lines preceded by `// eslint-disable-next-line @typescript-eslint/no-explicit-any` # or the broader `// @ts-ignore` / `// @ts-expect-error` opt-outs. if [[ "$line" -gt 1 ]] && sed -n "$((line - 1))p" "$file" 2>/dev/null | grep -qE '(eslint-disable-next-line[^*]*no-explicit-any|@ts-ignore|@ts-expect-error)'; then continue fi # Skip docstring/JSDoc continuation lines (mentions of "any" in prose). [[ "$evidence" =~ ^[[:space:]]*(\*|//) ]] && continue # Skip the very common `catch (e: any)` / `catch (err: any)` / `catch (error: any)` # pattern. TypeScript 4.4+ defaults caught errors to `unknown`, so this is # an explicit author choice to skip narrowing. Code style only, not a bug. if echo "$evidence" | grep -qE 'catch[[:space:]]*\([^)]*:[[:space:]]*any\)'; then continue fi # Skip false positives where `:any` is text inside a string literal or JSX # text content (e.g. `label: 'Energy: any'`, `owner:any`). # Real TS `: any` type annotations are followed by `,)=;>` or end-of-line; # string-literal occurrences are followed by alphanumeric / quote / closing # tag delimiter characters. We use a simple heuristic: if the `:any` is # immediately preceded by a non-whitespace word character that's not a # known TS punctuation, treat it as text. if echo "$evidence" | grep -qE "[a-zA-Z0-9]:[[:space:]]*any[ '\"<]"; then continue fi emit_finding "ts-any-type" "minor" "$repo" "$file" "$line" "any type: ${evidence:0:80}" done < <(grep -rnE ':\s*any\b|\bas\s+any\b' "$repo_dir" \ --include='*.ts' --include='*.tsx' \ --exclude='*.d.ts' \ "${EXCLUDE_DIRS[@]}" "${TEST_EXCLUDES[@]}" 2>/dev/null || true) } # Web-specific: hardcoded hex colors in TS/TSX/CSS source code. # Per-repo AGENTS.md MUST NOT — colors must come from design-tokens # (--XX-* CSS custom properties). # Severity: major. Excludes: design-tokens output, tests, comments. # Evidence: the actual matched hex code (e.g., "#fff") plus a short context # snippet, so triage doesn't need to open the file for trivial cases. scan_web_hardcoded_hex() { local repo="$1" repo_dir="$2" # Strategy: grep returns full line (so we can examine context), then we # extract the hex match from each result and apply context filters. while IFS=: read -r file line content; do [[ -z "$file" ]] && continue # Allow hex colors in DESIGN TOKEN DEFINITION files. These are the # canonical places where colors are declared as CSS custom properties # or design system tokens; flagging them would be a false positive. [[ "$file" =~ (^|/)(globals\.css|tokens\.css|tailwind\.config\.(ts|js|cjs|mjs)|.*\.tokens\..*|.*Theme\.(ts|tsx|swift|kt))$ ]] && continue [[ "$file" =~ /(generated|design-tokens|design-system)/ ]] && continue # Theme source files declare token values \u2014 entire /theme/ directory exempt. [[ "$file" =~ /theme/.+\.(ts|tsx|js)$ ]] && continue # Next.js API routes are server-side endpoints, not UI styling. [[ "$file" =~ /app/api/.+\.(ts|tsx)$ ]] && continue # Domain-data visualization files (organ-data, molecule-flows, etc.) # in pure-TS engine layer src/lib/. Colors are data, not styling. [[ "$file" =~ /src/lib/[a-zA-Z0-9_-]+-(data|flows|palette)\.(ts|tsx)$ ]] && continue # mac_tooling: standalone macOS forensics toolkit, not a ByteLyst product. # No design token system; uses Tailwind palette literals for data # visualization (DataFlowMap risk colors, scrollbar slate values). Exempt # per its own AGENTS.md "Differences from ByteLyst Product Repos" section. [[ "$repo" == "learning_ai_mac_tooling" ]] && continue # Backend code is not a UI styling layer. Hex values in backend modules # are data (e.g., theme presets, zone colors stored in Cosmos) \u2014 not # styling rule violations. [[ "$file" =~ (^|/)backend/ ]] && continue # Same for platform/extraction/mcp services (Fastify backends in common_plat). [[ "$file" =~ /services/[^/]+/src/ ]] && continue # Config / schema packages declare default theme values for products to # consume. Hex values there are schema defaults, not UI styling. [[ "$file" =~ /packages/config/ ]] && continue # Storybook stories and previews are documentation/demo, not production UI. [[ "$file" =~ (\.storybook/|/stories/|\.stories\.(ts|tsx)$) ]] && continue # Scaffolding templates contain example colors that get substituted at gen time. [[ "$file" =~ /packages/create-app/src/lib/templates ]] && continue # devops package (internal dev tooling, not a product UI). [[ "$file" =~ /packages/devops/ ]] && continue # Allow markdown-preview / code-picker / qr-code / image tool pages where # hex is the demo content being manipulated, not styling. [[ "$file" =~ /tools/(color-picker|markdown-preview|qr-code|image-to-base64|regex-tester)/ ]] && continue # Skip CSS custom property DEFINITIONS (anything from " --xxx:" onward, # including gradient values that embed multiple hex codes). [[ "$content" =~ ^[[:space:]]*--[a-zA-Z0-9-]+: ]] && continue # Skip CSS attribute selectors like [stroke='#ccc'] / [fill="#fff"] \u2014 # these are SELECTORS that match elements rendered with that attribute, # not styling declarations. [[ "$content" =~ \[(stroke|fill|color)=[\'\"]\# ]] && continue # Skip lines using the var(--token, #fallback) pattern \u2014 these are # defensive fallbacks for the design-token loading order, not raw hardcodes. [[ "$content" =~ var\(--[a-zA-Z0-9_-]+ ]] && continue # Skip comment lines (// or /* or *). [[ "$content" =~ ^[[:space:]]*(//|\*|/\*) ]] && continue # Skip Next.js PWA themeColor metadata (must be literal hex per spec). [[ "$content" =~ themeColor[[:space:]]*: ]] && continue # Skip SVG fill/stroke attributes with hex values \u2014 these are typically # brand-mandated colors (Google "G" logo, Microsoft, Apple) that the # respective brand guidelines REQUIRE be literal hex, not themed. [[ "$content" =~ (fill|stroke)=\"#[0-9a-fA-F]+\" ]] && continue # Skip theme editor / theme-defaults files \u2014 their content IS hex values # being manipulated, not styling. [[ "$file" =~ /(ThemeEditor|theme-defaults)\.(ts|tsx) ]] && continue [[ "$file" =~ /api/themes/ ]] && continue # Skip HTML numeric character references like 📄 \u2014 these encode # Unicode characters, NOT hex colors (the digits happen to be in [0-9] which # is a subset of hex, fooling the regex). [[ "$content" =~ \&\#[0-9]+\; ]] && continue # File-level opt-out: any file containing the marker AGENTS-SCAN-EXEMPT-HEX # in its first 50 lines is exempt. This is for legitimate visualization-data # files that inline canvas-draw functions tightly coupled to per-organ / # per-stage hex colors (where extraction would break draw-function closure # semantics). The marker MUST be paired with a referenced TODO-N comment so # the exception is tracked. if head -50 "$file" 2>/dev/null | grep -q "AGENTS-SCAN-EXEMPT-HEX"; then continue fi # Extract just the hex match for evidence. local match match=$(echo "$content" | grep -oE '#[0-9a-fA-F]{6}\b|#[0-9a-fA-F]{3}\b' | head -1) [[ -z "$match" ]] && continue emit_finding "web-hardcoded-hex" "major" "$repo" "$file" "$line" "Hardcoded hex color: $match" done < <(grep -rnE '#[0-9a-fA-F]{6}\b|#[0-9a-fA-F]{3}\b' "$repo_dir" \ --include='*.ts' --include='*.tsx' --include='*.css' --include='*.scss' \ "${EXCLUDE_DIRS[@]}" "${TEST_EXCLUDES[@]}" 2>/dev/null || true) } # B5 — hardcoded product ID string literals outside shared/product.json # and product-config files. # Severity: critical. The canonical pattern is PRODUCT_ID from product-config.ts # or @bytelyst/config. scan_b5_hardcoded_product_id() { local repo="$1" repo_dir="$2" local product_ids='"(lysnrai|mindlyst|chronomind|jarvisjr|nomgap|peakpulse|flowmonk|notelett|actiontrail|localmemgpt|efforise|localllmlab|smartauth|productivity-web|talk2obs)"' while IFS=: read -r file line evidence; do [[ -z "$file" ]] && continue # Allow in canonical locations [[ "$file" =~ (shared/product\.json|product-config\.(ts|js|swift|kt)|product\.manifest\.json) ]] && continue # Allow in test fixtures (they need literal IDs) [[ "$file" =~ (__tests__|tests/|\.test\.|\.spec\.) ]] && continue # Allow in docs [[ "$file" =~ \.(md|mdx)$ ]] && continue # Allow cross-product UI in common_plat dashboards (they legitimately # enumerate all products for admin operations). [[ "$repo" == "learning_ai_common_plat" && "$file" =~ dashboards/(admin-web|tracker-web|ux-lab)/ ]] && continue # Allow obvious enumeration patterns: SelectItem value=, option value=, # product list arrays, etc. These are intentional cross-product references, # not hardcoded product identity. [[ "$evidence" =~ (SelectItem|option|productId:|product:)[[:space:]]*[=:][[:space:]]*\" ]] && continue # Skip JSDoc / docstring / inline comment lines containing example product IDs. # Pattern: line begins with whitespace then '*' (JSDoc continuation), # '//' (line comment), or '#' (Python comment). [[ "$evidence" =~ ^[[:space:]]*(\*|//|#) ]] && continue # Skip TypeScript LITERAL TYPE constraints (line ends with ; \u2014 type def) # or object-literal values whose containing file has a matching literal type. # The simpler proxy: if the file declares 'productId: "";' anywhere as a # type literal, treat all matching value sites as intentional (the type # forces the value, so the hardcode is type-system-required). if [[ "$file" =~ \.tsx?$ ]]; then local id_in_evidence id_in_evidence=$(echo "$evidence" | grep -oE '"(lysnrai|mindlyst|chronomind|jarvisjr|nomgap|peakpulse|flowmonk|notelett|actiontrail|localmemgpt|efforise|localllmlab|smartauth|productivity-web|talk2obs)"' | head -1) if [[ -n "$id_in_evidence" ]]; then # Look for "productId: ;" type literal anywhere in the same file. if grep -qE "productId:[[:space:]]*${id_in_evidence};" "$file" 2>/dev/null; then continue fi fi fi emit_finding "b5-hardcoded-product-id" "critical" "$repo" "$file" "$line" "Hardcoded product ID: ${evidence:0:80}" done < <(grep -rnE "$product_ids" "$repo_dir" \ --include='*.ts' --include='*.tsx' --include='*.js' \ "${EXCLUDE_DIRS[@]}" 2>/dev/null || true) } # B7 — emojis in source code (per-repo AGENTS.md "Never add emojis to code"). # Severity: minor. Excludes: markdown, tests, generated files. # Implementation: writes a small Python helper to a temp file and runs it, # avoiding the bash heredoc-in-process-substitution pattern which produced # 'ambiguous redirect' errors under set -u. scan_b7_emojis() { local repo="$1" repo_dir="$2" command -v python3 >/dev/null 2>&1 || return 0 local py_helper out py_helper="$(mktemp -t emoji-scan.XXXXXX.py)" out="$(mktemp -t emoji-out.XXXXXX)" cat > "$py_helper" <<'PYEOF' import os, re, sys root = sys.argv[1] # Only flag DECORATIVE emojis (faces, food, animals, transport, hearts). # Explicitly EXCLUDE U+2600-U+27BF (Miscellaneous Symbols) which contains # ✓ ✗ ⚠ ★ ☐ ☑ ✓ — universally used as UI status indicators, not decorative. EMOJI_RE = re.compile( r"[\U0001F600-\U0001F64F]" # emoticons (faces) r"|[\U0001F300-\U0001F5FF]" # misc symbols + pictographs (decorative) r"|[\U0001F680-\U0001F6FF]" # transport + map r"|[\U0001F700-\U0001F77F]" # alchemical symbols r"|[\U0001F900-\U0001F9FF]" # supplemental symbols + pictographs r"|[\U0001FA70-\U0001FAFF]" # symbols + pictographs extended-A ) EXTS = {".ts", ".tsx", ".js", ".jsx", ".py", ".swift", ".kt", ".rs"} SKIP_DIRS = {"node_modules", ".git", ".next", "dist", "build", "coverage", "__pycache__", "target", "Pods", "DerivedData", "reports", "test-results", ".pytest_cache", ".venv", "venv", "__tests__", "__mocks__", "tests", "e2e", "generated", "__LOCAL_LLMs", "chat-history", "__experiments", "experiments", "_archive_helper", ".docker-deps", ".turbo", ".ruff_cache", ".gradle", "playwright-report"} # The AGENTS.md rule "Never add emojis to code unless explicitly asked" # targets DECORATIVE emojis in code comments + log messages (e.g., # `// 🎉 New feature!` or `console.log("✅ Done")`). Emojis used as # domain UI data — notification bells, achievement icons, time-of-day # markers — are intentional product content and SHOULD NOT be flagged. # # Heuristic: only flag emojis that appear in: # (a) Line / block comments (// ... or # ... or * ...) # (b) console.log / console.warn / console.info / console.debug calls # (logging output, not UI text) # (c) Python print() calls (terminal output) COMMENT_RE = re.compile(r"^\s*(//|#|\*)|^\s*/\*") LOG_OR_PRINT_RE = re.compile(r"\b(console\.(log|warn|info|debug|error)|print)\s*\(") for dp, dirs, files in os.walk(root): dirs[:] = [d for d in dirs if d not in SKIP_DIRS] for f in files: ext = os.path.splitext(f)[1] if ext not in EXTS: continue if f.endswith((".test.ts", ".test.tsx", ".spec.ts", ".spec.tsx", ".test.js", ".spec.js")): continue fp = os.path.join(dp, f) try: with open(fp, encoding="utf-8", errors="replace") as fh: for i, l in enumerate(fh, 1): m = EMOJI_RE.search(l) if not m: continue # Only flag if the line is a comment OR a log/print call. if COMMENT_RE.search(l) or LOG_OR_PRINT_RE.search(l): print(f"{fp}:{i}:{m.group(0)}") except (OSError, UnicodeDecodeError): continue PYEOF python3 "$py_helper" "$repo_dir" > "$out" 2>/dev/null || true while IFS=: read -r file line evidence; do [[ -z "$file" ]] && continue # Emojis in CLI / scaffolder / asset-generator output are intentional # terminal decoration, not product code noise. Apply the same path # exemptions used by console-log + python-print rules. [[ "$file" =~ (^|/)scripts/ ]] && continue [[ "$file" =~ /packages/create-app/ ]] && continue [[ "$file" =~ /services/monitoring/ ]] && continue [[ "$file" =~ /services/[^/]+/scripts/ ]] && continue [[ "$file" =~ (^|/)plugins/ ]] && continue [[ "$file" =~ /assets/ ]] && continue # kill_switch.py emits operator-facing CLI status output with status # emojis (✓/✗) \u2014 same category as scripts/ tools. [[ "$file" =~ /kill_switch\.py$ ]] && continue emit_finding "b7-emoji-in-code" "minor" "$repo" "$file" "$line" "Emoji in code: $evidence" done < "$out" rm -f "$py_helper" "$out" } # ─── Rule registry ───────────────────────────────────────────────────────────── RULES=( scan_b4_console_log scan_b4_swift_print scan_b4_python_print scan_ts_any_type scan_web_hardcoded_hex scan_b5_hardcoded_product_id scan_b7_emojis ) # ─── Scan loop ───────────────────────────────────────────────────────────────── echo "" > "$JSON_OUT" { echo "# Rule Violations Report — ${TODAY}" echo "" echo "> Generated by \`scripts/check-rule-violations.sh\` against canonical rules in" echo "> [\`AI.dev/SKILLS/agent-behavior-guidelines.md\`](../AI.dev/SKILLS/agent-behavior-guidelines.md)." echo "" echo "Severity legend: **critical** = data/security risk · **major** = rule violation · **minor** = style" echo "" } > "$MD_OUT" [[ "$QUIET" -eq 0 ]] && echo -e "${BLUE}Scanning $(echo "${REPOS[@]}" | wc -w | tr -d ' ') repo(s) against ${#RULES[@]} rules...${NC}" total_findings=0 for repo in "${REPOS[@]}"; do repo_dir="${BASE_DIR}/${repo}" if [[ ! -d "$repo_dir" ]]; then [[ "$QUIET" -eq 0 ]] && echo -e "${YELLOW} skip: $repo (directory missing)${NC}" continue fi # Reset per-repo finding arrays + counters FINDINGS_MD=() FINDINGS_JSON=() REPO_CRITICAL=0 REPO_MAJOR=0 REPO_MINOR=0 for rule_fn in "${RULES[@]}"; do "$rule_fn" "$repo" "$repo_dir" done # Append per-repo section to markdown report local_count=${#FINDINGS_MD[@]} total_findings=$(( total_findings + local_count )) c=$REPO_CRITICAL M=$REPO_MAJOR m=$REPO_MINOR { echo "## \`$repo\`" echo "" if [[ "$local_count" -eq 0 ]]; then echo "✅ No violations found." else echo "**Counts:** critical=$c · major=$M · minor=$m · total=$local_count" echo "" printf "%s\n" "${FINDINGS_MD[@]}" fi echo "" } >> "$MD_OUT" # Append per-repo JSON lines if [[ "$local_count" -gt 0 ]]; then printf '%s\n' "${FINDINGS_JSON[@]}" >> "$JSON_OUT" fi if [[ "$QUIET" -eq 0 ]]; then if [[ "$local_count" -eq 0 ]]; then echo -e " ${GREEN}✓ $repo${NC} (0 findings)" else echo -e " ${YELLOW}⚠ $repo${NC} critical=$c major=$M minor=$m total=$local_count" fi fi done # ─── Summary ─────────────────────────────────────────────────────────────────── # Compute per-rule totals from the JSONL output (bash 3.2-compatible). # Extract "rule":"" via sed, then sort | uniq -c. RULE_COUNTS_FILE="$(mktemp)" sed -nE 's/.*"rule":"([^"]+)".*/\1/p' "$JSON_OUT" | sort | uniq -c | sort -rn > "$RULE_COUNTS_FILE" { echo "## Ecosystem totals by rule" echo "" echo "| Rule | Total findings |" echo "|------|----------------|" while read -r count rule; do [[ -z "$rule" ]] && continue echo "| \`$rule\` | $count |" done < "$RULE_COUNTS_FILE" echo "" echo "**Grand total: $total_findings findings across ${#REPOS[@]} repos.**" } >> "$MD_OUT" echo "" echo -e "${BLUE}═══ Summary ═══${NC}" echo " Total findings: $total_findings" echo " Markdown report: $MD_OUT" echo " JSON report: $JSON_OUT" echo "" echo "By rule (highest first):" while read -r count rule; do [[ -z "$rule" ]] && continue printf " %-32s %d\n" "$rule" "$count" done < "$RULE_COUNTS_FILE" rm -f "$RULE_COUNTS_FILE"