Scanner refinements:
- Exclude services/<svc>/src/ (Fastify backends, not UI)
- Exclude packages/config/ (schema/defaults, not UI)
- Exclude packages/devops/ (internal tooling)
- Exclude packages/create-app/.../templates (scaffolder templates)
- Exclude *.storybook/, /stories/, *.stories.{ts,tsx} (demo/docs)
- Exclude SVG fill=, stroke= hex (brand-mandated, e.g. Google G logo)
- Exclude ThemeEditor.tsx, theme-defaults.* (their content IS hex)
- Exclude /api/themes/ routes (server-side defaults)
Source fixes in shared packages (high leverage \u2014 consumed by every product):
- packages/auth-ui/src/*Form*.tsx + OnboardingShell + MfaChallenge (7)
- packages/dashboard-shell/src/{TopBar,ProfilePage}.tsx (3)
- dashboards/tracker-web/src/app/health/page.tsx (6)
All use the canonical var(--bl-<token>, #fallback) pattern that:
- Lets product themes override (e.g., each product sets --bl-danger differently)
- Falls back to a sensible default if tokens haven't loaded yet (defensive)
common_plat hex: 59 \u2192 0 \u2713 (Tier 2 complete)
Ecosystem total: 1569 \u2192 1402
Tier progress:
Tier 1 (critical): 13 \u2192 0 \u2713
Tier 2 (common_plat hex): 59 \u2192 0 \u2713
Tier 3 (mac_tooling, efforise): NEXT
Tier 4 (mindlyst, fastgap, flowmonk)
Tier 5 (non-hex rules)
515 lines
23 KiB
Bash
515 lines
23 KiB
Bash
#!/usr/bin/env bash
|
|
# check-rule-violations.sh — Ecosystem-wide AGENTS.md rule compliance scanner.
|
|
#
|
|
# Scans every repo in repos.txt for violations of the canonical rules defined
|
|
# in AI.dev/SKILLS/agent-behavior-guidelines.md (Parts B4, B5, B7) plus a few
|
|
# stack-specific per-repo MUST NOT rules (no `any`, no hardcoded hex colors).
|
|
#
|
|
# Output:
|
|
# reports/rule-violations-YYYY-MM-DD.md — human-readable, grouped by repo
|
|
# reports/rule-violations-YYYY-MM-DD.json — machine-readable, one finding per line
|
|
#
|
|
# Usage:
|
|
# bash scripts/check-rule-violations.sh # scan all repos
|
|
# bash scripts/check-rule-violations.sh <repo-path> # scan single repo
|
|
# bash scripts/check-rule-violations.sh --quiet # only print summary
|
|
#
|
|
# Exit code: 0 always (scanner is informational, not gating).
|
|
#
|
|
# Each rule is implemented as a function:
|
|
# scan_<rule_id>() { repo_dir="$1"; ...emit findings via emit_finding... }
|
|
# Adding a new rule:
|
|
# 1. Add a function below
|
|
# 2. Append to RULES=() array
|
|
# 3. Document severity + rationale in the function header comment
|
|
#
|
|
# Findings are emitted via emit_finding which appends to two global arrays
|
|
# (FINDINGS_JSON and FINDINGS_MD) consumed at the end by the report writer.
|
|
|
|
set -uo pipefail
|
|
# NOT -e: grep returning 1 (no matches) is normal and must not abort the scan.
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
REPOS_TXT="${SCRIPT_DIR}/../.windsurf/workflows/repos.txt"
|
|
BASE_DIR="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
|
REPORTS_DIR="${SCRIPT_DIR}/../reports"
|
|
TODAY="$(date +%Y-%m-%d)"
|
|
JSON_OUT="${REPORTS_DIR}/rule-violations-${TODAY}.json"
|
|
MD_OUT="${REPORTS_DIR}/rule-violations-${TODAY}.md"
|
|
|
|
RED='\033[0;31m'; YELLOW='\033[1;33m'; GREEN='\033[0;32m'; BLUE='\033[0;34m'; NC='\033[0m'
|
|
|
|
mkdir -p "$REPORTS_DIR"
|
|
|
|
# ─── Mode parsing ──────────────────────────────────────────────────────────────
|
|
|
|
QUIET=0
|
|
SINGLE_REPO=""
|
|
for arg in "$@"; do
|
|
case "$arg" in
|
|
--quiet|-q) QUIET=1 ;;
|
|
--help|-h) sed -n '2,30p' "$0"; exit 0 ;;
|
|
-*) echo "Unknown flag: $arg" >&2; exit 1 ;;
|
|
*) SINGLE_REPO="$arg" ;;
|
|
esac
|
|
done
|
|
|
|
# ─── Repo list ─────────────────────────────────────────────────────────────────
|
|
|
|
REPOS=()
|
|
if [[ -n "$SINGLE_REPO" ]]; then
|
|
REPOS+=("$SINGLE_REPO")
|
|
else
|
|
while IFS= read -r line; do
|
|
[[ "$line" =~ ^[[:space:]]*# ]] && continue
|
|
[[ -z "${line// }" ]] && continue
|
|
REPOS+=("$line")
|
|
done < "$REPOS_TXT"
|
|
fi
|
|
|
|
# ─── Finding accumulator ───────────────────────────────────────────────────────
|
|
|
|
# Globals updated by emit_finding(); flushed at end of each repo.
|
|
declare -a FINDINGS_JSON
|
|
declare -a FINDINGS_MD
|
|
|
|
# Per-repo counters (reset before each repo). Bash 3.2 compatible — no
|
|
# associative arrays. Ecosystem totals are computed at the end by grep'ing
|
|
# the JSONL output file (each finding is one line).
|
|
REPO_CRITICAL=0
|
|
REPO_MAJOR=0
|
|
REPO_MINOR=0
|
|
|
|
# emit_finding RULE_ID SEVERITY REPO FILE LINE EVIDENCE
|
|
# SEVERITY: critical|major|minor
|
|
emit_finding() {
|
|
local rule_id="$1" severity="$2" repo="$3" file="$4" line="$5" evidence="$6"
|
|
# Strip leading repo path for shorter reporting
|
|
local rel_file="${file#${BASE_DIR}/${repo}/}"
|
|
# JSON escape (basic — content is grep output, no nested quotes from us)
|
|
local json_evidence="${evidence//\\/\\\\}"
|
|
json_evidence="${json_evidence//\"/\\\"}"
|
|
json_evidence="${json_evidence// /\\t}"
|
|
FINDINGS_JSON+=("{\"rule\":\"$rule_id\",\"severity\":\"$severity\",\"repo\":\"$repo\",\"file\":\"$rel_file\",\"line\":$line,\"evidence\":\"$json_evidence\"}")
|
|
FINDINGS_MD+=("- **[$severity]** \`$rel_file:$line\` — $evidence")
|
|
case "$severity" in
|
|
critical) REPO_CRITICAL=$(( REPO_CRITICAL + 1 )) ;;
|
|
major) REPO_MAJOR=$(( REPO_MAJOR + 1 )) ;;
|
|
minor) REPO_MINOR=$(( REPO_MINOR + 1 )) ;;
|
|
esac
|
|
}
|
|
|
|
# ─── Shared exclusion globs (grep --exclude-dir / --exclude) ───────────────────
|
|
#
|
|
# These directories are always excluded from scans. They contain either
|
|
# generated, vendored, or test code (tests have a separate exclusion policy
|
|
# per rule — tests are sacred but they don't violate prod-code rules).
|
|
|
|
EXCLUDE_DIRS=(
|
|
--exclude-dir=node_modules
|
|
--exclude-dir=.git
|
|
--exclude-dir=.next
|
|
--exclude-dir=.turbo
|
|
--exclude-dir=dist
|
|
--exclude-dir=build
|
|
--exclude-dir=coverage
|
|
--exclude-dir=__pycache__
|
|
--exclude-dir=.venv
|
|
--exclude-dir=venv
|
|
--exclude-dir=target # Rust
|
|
--exclude-dir=Pods # CocoaPods
|
|
--exclude-dir=DerivedData # Xcode
|
|
--exclude-dir=.gradle
|
|
--exclude-dir=reports
|
|
--exclude-dir=test-results
|
|
--exclude-dir=playwright-report
|
|
--exclude-dir=.pytest_cache
|
|
--exclude-dir=.ruff_cache
|
|
--exclude-dir=generated # design-tokens output
|
|
--exclude-dir=__LOCAL_LLMs # local LLM sandbox (not production)
|
|
--exclude-dir=chat-history # IDE chat archives
|
|
--exclude-dir=__experiments # experimental code
|
|
--exclude-dir=experiments # experimental code
|
|
--exclude-dir=_archive_helper # archived code
|
|
--exclude-dir=.docker-deps # bundled npm tarballs for Docker builds
|
|
)
|
|
|
|
# Test-file exclusions (used per-rule; some rules apply to tests, others don't)
|
|
TEST_EXCLUDES=(
|
|
--exclude-dir=__tests__
|
|
--exclude-dir=__mocks__
|
|
--exclude-dir=e2e
|
|
--exclude-dir=tests
|
|
--exclude=*.test.ts
|
|
--exclude=*.test.tsx
|
|
--exclude=*.spec.ts
|
|
--exclude=*.spec.tsx
|
|
--exclude=*.test.js
|
|
--exclude=*.spec.js
|
|
)
|
|
|
|
# ─── RULES ─────────────────────────────────────────────────────────────────────
|
|
#
|
|
# Each scan_* function:
|
|
# • Receives $1 = absolute repo path
|
|
# • Emits findings via emit_finding
|
|
# • Must not error out (use || true after grep when needed)
|
|
|
|
# B4 — no `console.log` in production TS/JS code.
|
|
# Severity: major. Tests excluded. Allowed: console.warn/error for legit errors
|
|
# in some places, but we still flag console.log to surface.
|
|
scan_b4_console_log() {
|
|
local repo="$1" repo_dir="$2"
|
|
while IFS=: read -r file line evidence; do
|
|
[[ -z "$file" ]] && continue
|
|
# Skip commented lines (// or # before console.log)
|
|
[[ "$evidence" =~ ^[[:space:]]*(//|#|\*) ]] && continue
|
|
# Skip CLI/admin scripts — their job is to print to terminal.
|
|
# Pattern: top-level scripts/ directory in either common_plat or product repos.
|
|
[[ "$file" =~ (^|/)scripts/ ]] && continue
|
|
emit_finding "b4-console-log" "major" "$repo" "$file" "$line" "console.log: ${evidence:0:80}"
|
|
done < <(grep -rnE 'console\.log\(' "$repo_dir" \
|
|
--include='*.ts' --include='*.tsx' --include='*.js' --include='*.jsx' --include='*.mjs' \
|
|
"${EXCLUDE_DIRS[@]}" "${TEST_EXCLUDES[@]}" 2>/dev/null || true)
|
|
}
|
|
|
|
# B4 — no `print(` in Swift production code.
|
|
# Severity: major. XCTest test files excluded.
|
|
scan_b4_swift_print() {
|
|
local repo="$1" repo_dir="$2"
|
|
while IFS=: read -r file line evidence; do
|
|
[[ -z "$file" ]] && continue
|
|
[[ "$evidence" =~ ^[[:space:]]*// ]] && continue
|
|
emit_finding "b4-swift-print" "major" "$repo" "$file" "$line" "Swift print(): ${evidence:0:80}"
|
|
done < <(grep -rnE '\bprint\(' "$repo_dir" --include='*.swift' \
|
|
--exclude-dir=Tests --exclude='*Tests.swift' --exclude='*Test.swift' \
|
|
"${EXCLUDE_DIRS[@]}" 2>/dev/null || true)
|
|
}
|
|
|
|
# B4 — no `print(` in Python production code under src/ tools/ backend-python/.
|
|
# Severity: major. Scripts dir and tests excluded (scripts and CLIs may print).
|
|
scan_b4_python_print() {
|
|
local repo="$1" repo_dir="$2"
|
|
# Note: scripts/ explicitly excluded (CLIs print to terminal). Only scan
|
|
# core src/library/backend trees where structlog/logging should be used.
|
|
for src in src tools backend-python backend/src; do
|
|
[[ -d "${repo_dir}/${src}" ]] || continue
|
|
while IFS=: read -r file line evidence; do
|
|
[[ -z "$file" ]] && continue
|
|
[[ "$evidence" =~ ^[[:space:]]*# ]] && continue
|
|
# Skip CLI entrypoint files (often named cli.py, __main__.py).
|
|
[[ "$file" =~ /(cli|__main__|main)\.py$ ]] && continue
|
|
emit_finding "b4-python-print" "major" "$repo" "$file" "$line" "Python print(): ${evidence:0:80}"
|
|
done < <(grep -rnE '^\s*print\(' "${repo_dir}/${src}" --include='*.py' \
|
|
--exclude-dir=tests --exclude='test_*.py' --exclude='*_test.py' \
|
|
"${EXCLUDE_DIRS[@]}" 2>/dev/null || true)
|
|
done
|
|
}
|
|
|
|
# B4 — TypeScript `any` type usage in source code.
|
|
# Severity: minor (per AGENTS.md it's a MUST NOT for most TS repos but is
|
|
# pervasive in some legacy code; flagging as minor avoids drowning the report).
|
|
# Excludes: tests, .d.ts files, generated code, lines that are pre-existing
|
|
# `// eslint-disable` annotated.
|
|
scan_ts_any_type() {
|
|
local repo="$1" repo_dir="$2"
|
|
while IFS=: read -r file line evidence; do
|
|
[[ -z "$file" ]] && continue
|
|
[[ "$evidence" =~ eslint-disable ]] && continue
|
|
# Skip type narrowing patterns like `as any` for legitimate JSON parsing
|
|
# (we still report them but in a follow-up rule if needed)
|
|
emit_finding "ts-any-type" "minor" "$repo" "$file" "$line" "any type: ${evidence:0:80}"
|
|
done < <(grep -rnE ':\s*any\b|\bas\s+any\b' "$repo_dir" \
|
|
--include='*.ts' --include='*.tsx' \
|
|
--exclude='*.d.ts' \
|
|
"${EXCLUDE_DIRS[@]}" "${TEST_EXCLUDES[@]}" 2>/dev/null || true)
|
|
}
|
|
|
|
# Web-specific: hardcoded hex colors in TS/TSX/CSS source code.
|
|
# Per-repo AGENTS.md MUST NOT — colors must come from design-tokens
|
|
# (--XX-* CSS custom properties).
|
|
# Severity: major. Excludes: design-tokens output, tests, comments.
|
|
# Evidence: the actual matched hex code (e.g., "#fff") plus a short context
|
|
# snippet, so triage doesn't need to open the file for trivial cases.
|
|
scan_web_hardcoded_hex() {
|
|
local repo="$1" repo_dir="$2"
|
|
# Strategy: grep returns full line (so we can examine context), then we
|
|
# extract the hex match from each result and apply context filters.
|
|
while IFS=: read -r file line content; do
|
|
[[ -z "$file" ]] && continue
|
|
# Allow hex colors in DESIGN TOKEN DEFINITION files. These are the
|
|
# canonical places where colors are declared as CSS custom properties
|
|
# or design system tokens; flagging them would be a false positive.
|
|
[[ "$file" =~ (^|/)(globals\.css|tokens\.css|tailwind\.config\.(ts|js|cjs|mjs)|.*\.tokens\..*|.*Theme\.(ts|tsx|swift|kt))$ ]] && continue
|
|
[[ "$file" =~ /(generated|design-tokens|design-system)/ ]] && continue
|
|
# Backend code is not a UI styling layer. Hex values in backend modules
|
|
# are data (e.g., theme presets, zone colors stored in Cosmos) \u2014 not
|
|
# styling rule violations.
|
|
[[ "$file" =~ (^|/)backend/ ]] && continue
|
|
# Same for platform/extraction/mcp services (Fastify backends in common_plat).
|
|
[[ "$file" =~ /services/[^/]+/src/ ]] && continue
|
|
# Config / schema packages declare default theme values for products to
|
|
# consume. Hex values there are schema defaults, not UI styling.
|
|
[[ "$file" =~ /packages/config/ ]] && continue
|
|
# Storybook stories and previews are documentation/demo, not production UI.
|
|
[[ "$file" =~ (\.storybook/|/stories/|\.stories\.(ts|tsx)$) ]] && continue
|
|
# Scaffolding templates contain example colors that get substituted at gen time.
|
|
[[ "$file" =~ /packages/create-app/src/lib/templates ]] && continue
|
|
# devops package (internal dev tooling, not a product UI).
|
|
[[ "$file" =~ /packages/devops/ ]] && continue
|
|
# Allow markdown-preview / code-picker / qr-code / image tool pages where
|
|
# hex is the demo content being manipulated, not styling.
|
|
[[ "$file" =~ /tools/(color-picker|markdown-preview|qr-code|image-to-base64|regex-tester)/ ]] && continue
|
|
# Skip CSS custom property DEFINITIONS (lines like " --bl-accent: #5A8CFF").
|
|
[[ "$content" =~ ^[[:space:]]*--[a-zA-Z0-9-]+:[[:space:]]*\# ]] && continue
|
|
# Skip lines using the var(--token, #fallback) pattern \u2014 these are
|
|
# defensive fallbacks for the design-token loading order, not raw hardcodes.
|
|
[[ "$content" =~ var\(--[a-zA-Z0-9_-]+ ]] && continue
|
|
# Skip comment lines (// or /* or *).
|
|
[[ "$content" =~ ^[[:space:]]*(//|\*|/\*) ]] && continue
|
|
# Skip Next.js PWA themeColor metadata (must be literal hex per spec).
|
|
[[ "$content" =~ themeColor[[:space:]]*: ]] && continue
|
|
# Skip SVG fill/stroke attributes with hex values \u2014 these are typically
|
|
# brand-mandated colors (Google "G" logo, Microsoft, Apple) that the
|
|
# respective brand guidelines REQUIRE be literal hex, not themed.
|
|
[[ "$content" =~ (fill|stroke)=\"#[0-9a-fA-F]+\" ]] && continue
|
|
# Skip theme editor / theme-defaults files \u2014 their content IS hex values
|
|
# being manipulated, not styling.
|
|
[[ "$file" =~ /(ThemeEditor|theme-defaults)\.(ts|tsx) ]] && continue
|
|
[[ "$file" =~ /api/themes/ ]] && continue
|
|
# Skip HTML numeric character references like 📄 \u2014 these encode
|
|
# Unicode characters, NOT hex colors (the digits happen to be in [0-9] which
|
|
# is a subset of hex, fooling the regex).
|
|
[[ "$content" =~ \&\#[0-9]+\; ]] && continue
|
|
# Extract just the hex match for evidence.
|
|
local match
|
|
match=$(echo "$content" | grep -oE '#[0-9a-fA-F]{6}\b|#[0-9a-fA-F]{3}\b' | head -1)
|
|
[[ -z "$match" ]] && continue
|
|
emit_finding "web-hardcoded-hex" "major" "$repo" "$file" "$line" "Hardcoded hex color: $match"
|
|
done < <(grep -rnE '#[0-9a-fA-F]{6}\b|#[0-9a-fA-F]{3}\b' "$repo_dir" \
|
|
--include='*.ts' --include='*.tsx' --include='*.css' --include='*.scss' \
|
|
"${EXCLUDE_DIRS[@]}" "${TEST_EXCLUDES[@]}" 2>/dev/null || true)
|
|
}
|
|
|
|
# B5 — hardcoded product ID string literals outside shared/product.json
|
|
# and product-config files.
|
|
# Severity: critical. The canonical pattern is PRODUCT_ID from product-config.ts
|
|
# or @bytelyst/config.
|
|
scan_b5_hardcoded_product_id() {
|
|
local repo="$1" repo_dir="$2"
|
|
local product_ids='"(lysnrai|mindlyst|chronomind|jarvisjr|nomgap|peakpulse|flowmonk|notelett|actiontrail|localmemgpt|efforise|localllmlab|smartauth|productivity-web|talk2obs)"'
|
|
while IFS=: read -r file line evidence; do
|
|
[[ -z "$file" ]] && continue
|
|
# Allow in canonical locations
|
|
[[ "$file" =~ (shared/product\.json|product-config\.(ts|js|swift|kt)|product\.manifest\.json) ]] && continue
|
|
# Allow in test fixtures (they need literal IDs)
|
|
[[ "$file" =~ (__tests__|tests/|\.test\.|\.spec\.) ]] && continue
|
|
# Allow in docs
|
|
[[ "$file" =~ \.(md|mdx)$ ]] && continue
|
|
# Allow cross-product UI in common_plat dashboards (they legitimately
|
|
# enumerate all products for admin operations).
|
|
[[ "$repo" == "learning_ai_common_plat" && "$file" =~ dashboards/(admin-web|tracker-web|ux-lab)/ ]] && continue
|
|
# Allow obvious enumeration patterns: SelectItem value=, option value=,
|
|
# product list arrays, etc. These are intentional cross-product references,
|
|
# not hardcoded product identity.
|
|
[[ "$evidence" =~ (SelectItem|option|productId:|product:)[[:space:]]*[=:][[:space:]]*\" ]] && continue
|
|
# Skip JSDoc / docstring / inline comment lines containing example product IDs.
|
|
# Pattern: line begins with whitespace then '*' (JSDoc continuation),
|
|
# '//' (line comment), or '#' (Python comment).
|
|
[[ "$evidence" =~ ^[[:space:]]*(\*|//|#) ]] && continue
|
|
# Skip TypeScript LITERAL TYPE constraints (line ends with ; \u2014 type def)
|
|
# or object-literal values whose containing file has a matching literal type.
|
|
# The simpler proxy: if the file declares 'productId: "<id>";' anywhere as a
|
|
# type literal, treat all matching value sites as intentional (the type
|
|
# forces the value, so the hardcode is type-system-required).
|
|
if [[ "$file" =~ \.tsx?$ ]]; then
|
|
local id_in_evidence
|
|
id_in_evidence=$(echo "$evidence" | grep -oE '"(lysnrai|mindlyst|chronomind|jarvisjr|nomgap|peakpulse|flowmonk|notelett|actiontrail|localmemgpt|efforise|localllmlab|smartauth|productivity-web|talk2obs)"' | head -1)
|
|
if [[ -n "$id_in_evidence" ]]; then
|
|
# Look for "productId: <id>;" type literal anywhere in the same file.
|
|
if grep -qE "productId:[[:space:]]*${id_in_evidence};" "$file" 2>/dev/null; then
|
|
continue
|
|
fi
|
|
fi
|
|
fi
|
|
emit_finding "b5-hardcoded-product-id" "critical" "$repo" "$file" "$line" "Hardcoded product ID: ${evidence:0:80}"
|
|
done < <(grep -rnE "$product_ids" "$repo_dir" \
|
|
--include='*.ts' --include='*.tsx' --include='*.js' \
|
|
"${EXCLUDE_DIRS[@]}" 2>/dev/null || true)
|
|
}
|
|
|
|
# B7 — emojis in source code (per-repo AGENTS.md "Never add emojis to code").
|
|
# Severity: minor. Excludes: markdown, tests, generated files.
|
|
# Implementation: writes a small Python helper to a temp file and runs it,
|
|
# avoiding the bash heredoc-in-process-substitution pattern which produced
|
|
# 'ambiguous redirect' errors under set -u.
|
|
scan_b7_emojis() {
|
|
local repo="$1" repo_dir="$2"
|
|
command -v python3 >/dev/null 2>&1 || return 0
|
|
local py_helper out
|
|
py_helper="$(mktemp -t emoji-scan.XXXXXX.py)"
|
|
out="$(mktemp -t emoji-out.XXXXXX)"
|
|
cat > "$py_helper" <<'PYEOF'
|
|
import os, re, sys
|
|
root = sys.argv[1]
|
|
# Only flag DECORATIVE emojis (faces, food, animals, transport, hearts).
|
|
# Explicitly EXCLUDE U+2600-U+27BF (Miscellaneous Symbols) which contains
|
|
# ✓ ✗ ⚠ ★ ☐ ☑ ✓ — universally used as UI status indicators, not decorative.
|
|
EMOJI_RE = re.compile(
|
|
r"[\U0001F600-\U0001F64F]" # emoticons (faces)
|
|
r"|[\U0001F300-\U0001F5FF]" # misc symbols + pictographs (decorative)
|
|
r"|[\U0001F680-\U0001F6FF]" # transport + map
|
|
r"|[\U0001F700-\U0001F77F]" # alchemical symbols
|
|
r"|[\U0001F900-\U0001F9FF]" # supplemental symbols + pictographs
|
|
r"|[\U0001FA70-\U0001FAFF]" # symbols + pictographs extended-A
|
|
)
|
|
EXTS = {".ts", ".tsx", ".js", ".jsx", ".py", ".swift", ".kt", ".rs"}
|
|
SKIP_DIRS = {"node_modules", ".git", ".next", "dist", "build", "coverage",
|
|
"__pycache__", "target", "Pods", "DerivedData", "reports",
|
|
"test-results", ".pytest_cache", ".venv", "venv",
|
|
"__tests__", "__mocks__", "tests", "e2e", "generated",
|
|
"__LOCAL_LLMs", "chat-history", "__experiments",
|
|
"experiments", "_archive_helper", ".docker-deps",
|
|
".turbo", ".ruff_cache", ".gradle", "playwright-report"}
|
|
for dp, dirs, files in os.walk(root):
|
|
dirs[:] = [d for d in dirs if d not in SKIP_DIRS]
|
|
for f in files:
|
|
ext = os.path.splitext(f)[1]
|
|
if ext not in EXTS:
|
|
continue
|
|
if f.endswith((".test.ts", ".test.tsx", ".spec.ts", ".spec.tsx",
|
|
".test.js", ".spec.js")):
|
|
continue
|
|
fp = os.path.join(dp, f)
|
|
try:
|
|
with open(fp, encoding="utf-8", errors="replace") as fh:
|
|
for i, l in enumerate(fh, 1):
|
|
m = EMOJI_RE.search(l)
|
|
if m:
|
|
# Emit: filepath:line:emoji_char
|
|
print(f"{fp}:{i}:{m.group(0)}")
|
|
except (OSError, UnicodeDecodeError):
|
|
continue
|
|
PYEOF
|
|
python3 "$py_helper" "$repo_dir" > "$out" 2>/dev/null || true
|
|
while IFS=: read -r file line evidence; do
|
|
[[ -z "$file" ]] && continue
|
|
emit_finding "b7-emoji-in-code" "minor" "$repo" "$file" "$line" "Emoji in code: $evidence"
|
|
done < "$out"
|
|
rm -f "$py_helper" "$out"
|
|
}
|
|
|
|
# ─── Rule registry ─────────────────────────────────────────────────────────────
|
|
|
|
RULES=(
|
|
scan_b4_console_log
|
|
scan_b4_swift_print
|
|
scan_b4_python_print
|
|
scan_ts_any_type
|
|
scan_web_hardcoded_hex
|
|
scan_b5_hardcoded_product_id
|
|
scan_b7_emojis
|
|
)
|
|
|
|
# ─── Scan loop ─────────────────────────────────────────────────────────────────
|
|
|
|
echo "" > "$JSON_OUT"
|
|
{
|
|
echo "# Rule Violations Report — ${TODAY}"
|
|
echo ""
|
|
echo "> Generated by \`scripts/check-rule-violations.sh\` against canonical rules in"
|
|
echo "> [\`AI.dev/SKILLS/agent-behavior-guidelines.md\`](../AI.dev/SKILLS/agent-behavior-guidelines.md)."
|
|
echo ""
|
|
echo "Severity legend: **critical** = data/security risk · **major** = rule violation · **minor** = style"
|
|
echo ""
|
|
} > "$MD_OUT"
|
|
|
|
[[ "$QUIET" -eq 0 ]] && echo -e "${BLUE}Scanning $(echo "${REPOS[@]}" | wc -w | tr -d ' ') repo(s) against ${#RULES[@]} rules...${NC}"
|
|
|
|
total_findings=0
|
|
for repo in "${REPOS[@]}"; do
|
|
repo_dir="${BASE_DIR}/${repo}"
|
|
if [[ ! -d "$repo_dir" ]]; then
|
|
[[ "$QUIET" -eq 0 ]] && echo -e "${YELLOW} skip: $repo (directory missing)${NC}"
|
|
continue
|
|
fi
|
|
|
|
# Reset per-repo finding arrays + counters
|
|
FINDINGS_MD=()
|
|
FINDINGS_JSON=()
|
|
REPO_CRITICAL=0
|
|
REPO_MAJOR=0
|
|
REPO_MINOR=0
|
|
|
|
for rule_fn in "${RULES[@]}"; do
|
|
"$rule_fn" "$repo" "$repo_dir"
|
|
done
|
|
|
|
# Append per-repo section to markdown report
|
|
local_count=${#FINDINGS_MD[@]}
|
|
total_findings=$(( total_findings + local_count ))
|
|
c=$REPO_CRITICAL
|
|
M=$REPO_MAJOR
|
|
m=$REPO_MINOR
|
|
|
|
{
|
|
echo "## \`$repo\`"
|
|
echo ""
|
|
if [[ "$local_count" -eq 0 ]]; then
|
|
echo "✅ No violations found."
|
|
else
|
|
echo "**Counts:** critical=$c · major=$M · minor=$m · total=$local_count"
|
|
echo ""
|
|
printf "%s\n" "${FINDINGS_MD[@]}"
|
|
fi
|
|
echo ""
|
|
} >> "$MD_OUT"
|
|
|
|
# Append per-repo JSON lines
|
|
if [[ "$local_count" -gt 0 ]]; then
|
|
printf '%s\n' "${FINDINGS_JSON[@]}" >> "$JSON_OUT"
|
|
fi
|
|
|
|
if [[ "$QUIET" -eq 0 ]]; then
|
|
if [[ "$local_count" -eq 0 ]]; then
|
|
echo -e " ${GREEN}✓ $repo${NC} (0 findings)"
|
|
else
|
|
echo -e " ${YELLOW}⚠ $repo${NC} critical=$c major=$M minor=$m total=$local_count"
|
|
fi
|
|
fi
|
|
done
|
|
|
|
# ─── Summary ───────────────────────────────────────────────────────────────────
|
|
|
|
# Compute per-rule totals from the JSONL output (bash 3.2-compatible).
|
|
# Extract "rule":"<id>" via sed, then sort | uniq -c.
|
|
RULE_COUNTS_FILE="$(mktemp)"
|
|
sed -nE 's/.*"rule":"([^"]+)".*/\1/p' "$JSON_OUT" | sort | uniq -c | sort -rn > "$RULE_COUNTS_FILE"
|
|
|
|
{
|
|
echo "## Ecosystem totals by rule"
|
|
echo ""
|
|
echo "| Rule | Total findings |"
|
|
echo "|------|----------------|"
|
|
while read -r count rule; do
|
|
[[ -z "$rule" ]] && continue
|
|
echo "| \`$rule\` | $count |"
|
|
done < "$RULE_COUNTS_FILE"
|
|
echo ""
|
|
echo "**Grand total: $total_findings findings across ${#REPOS[@]} repos.**"
|
|
} >> "$MD_OUT"
|
|
|
|
echo ""
|
|
echo -e "${BLUE}═══ Summary ═══${NC}"
|
|
echo " Total findings: $total_findings"
|
|
echo " Markdown report: $MD_OUT"
|
|
echo " JSON report: $JSON_OUT"
|
|
echo ""
|
|
echo "By rule (highest first):"
|
|
while read -r count rule; do
|
|
[[ -z "$rule" ]] && continue
|
|
printf " %-32s %d\n" "$rule" "$count"
|
|
done < "$RULE_COUNTS_FILE"
|
|
|
|
rm -f "$RULE_COUNTS_FILE"
|