diff --git a/docs/vm-security-blind-spots-roadmap.md b/docs/vm-security-blind-spots-roadmap.md index 60222b3..24a9d8d 100644 --- a/docs/vm-security-blind-spots-roadmap.md +++ b/docs/vm-security-blind-spots-roadmap.md @@ -357,7 +357,7 @@ Effective `sshd -T` settings showed: - [ ] Inventory root/user crontabs, `/etc/cron.d`, systemd timers, Hermes cron, and Gitea Actions schedules. - [x] Remove or update stale `/opt/bytelyst/bytelyst-devops-tools/...` references after confirming replacements. - [ ] Add owner, purpose, expected output, and alert channel for every job. -- [ ] Add a stale-job detector for missing script paths and failed systemd units. +- [x] Add a stale-job detector for missing script paths and failed systemd units. **Acceptance criteria:** @@ -400,7 +400,7 @@ Effective `sshd -T` settings showed: - [ ] Fix/retire unhealthy containers. - [x] Resolve `hermes-root-backup.service` failed state. - [x] Decide and document Gitea runner active/disabled state. -- [ ] Add missing-script checks. Stale root cron path was fixed on 2026-05-27. +- [x] Add missing-script checks. Stale root cron path was fixed on 2026-05-27. - [ ] Apply pending security/runtime updates in a maintenance window. **Exit criteria:** no unexpected failed units, no ignored unhealthy required containers, no stale cron paths, and runner state is intentional. @@ -497,6 +497,24 @@ Minimum post-checks for Phase 1: - Record a small dedicated smoke workflow that does not need production secrets, so runner health is proven by a controlled workflow rather than incidental queued work. - Add runner health to VM observability so enabled-but-inactive drift is caught automatically. +### 2026-05-27 — Phase 2 stale automation detector + +**Changed:** + +- Extended `scripts/VMs/HostingerVM/vm-health-check.sh` with an `AUTOMATION DRIFT` section. +- The daily health check now reports failed systemd units and root crontab script paths that no longer exist. +- Made optional `/var/log/vm-health-check.log` writes silent when the script runs in a restricted/read-only context. + +**Verified:** + +- `bash -n scripts/VMs/HostingerVM/vm-health-check.sh` passed. +- Restricted `--json` run stayed quiet on log-write failure and reported the new checks. +- Host-permission `--json` run reported `failed_units=OK` and `cron_missing_paths=OK`. + +**Residual risk:** + +- The detector currently covers root crontab and failed systemd units. Full ownership inventory still needs `/etc/cron.d`, user crontabs, Hermes cron, Gitea schedules, owners, outputs, and alert channels. + ## Do Not Start With - Rootless Docker migration. diff --git a/scripts/README.md b/scripts/README.md index 1c0df1f..b1b8475 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -8,6 +8,10 @@ This directory is the preferred home for self-contained operational scripts. - Supported. - Purpose: update and harden Ubuntu VMs with unattended upgrades, UFW, and fail2ban. - Risk level: high, because it modifies packages, firewall rules, and reboot behavior. +- `VMs/HostingerVM/vm-health-check.sh` + - Supported. + - Purpose: read-only VM health and drift check for disk, memory, swap, Docker health, failed systemd units, and stale root crontab script paths. + - Risk level: low, because it is read-only apart from an optional local log write. ## Conventions diff --git a/scripts/VMs/HostingerVM/vm-health-check.sh b/scripts/VMs/HostingerVM/vm-health-check.sh index 3c86b8a..87a8d34 100755 --- a/scripts/VMs/HostingerVM/vm-health-check.sh +++ b/scripts/VMs/HostingerVM/vm-health-check.sh @@ -69,6 +69,11 @@ declare -A JSON_DATA # ── Helpers ───────────────────────────────────────────────────────────────── log_to_file() { + local log_dir + log_dir="$(dirname "$LOG_FILE")" + if [[ ( -e "$LOG_FILE" && ! -w "$LOG_FILE" ) || ( ! -e "$LOG_FILE" && ! -w "$log_dir" ) ]]; then + return + fi echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] $*" >> "$LOG_FILE" 2>/dev/null || true } @@ -256,6 +261,47 @@ check_logs() { fi } +check_automation_drift() { + header "AUTOMATION DRIFT" + + local failed_units + failed_units=$(systemctl --failed --no-legend --plain 2>/dev/null | awk '{print $1}' | paste -sd, - || true) + if [[ -n "$failed_units" ]]; then + record failed_units WARN "$failed_units" "Failed systemd units: $failed_units" + else + record failed_units OK "0 failed" "No failed systemd units" + fi + + local missing_paths=() + local cron_line path clean_path + while IFS= read -r cron_line; do + [[ -z "$cron_line" || "$cron_line" =~ ^[[:space:]]*# ]] && continue + [[ "$cron_line" =~ ^[A-Za-z_][A-Za-z0-9_]*= ]] && continue + + while IFS= read -r path; do + clean_path="${path%\"}" + clean_path="${clean_path%\'}" + clean_path="${clean_path%,}" + clean_path="${clean_path%;}" + clean_path="${clean_path%)}" + + case "$clean_path" in + /var/log/*|/run/*|/proc/*|/sys/*|/dev/*) continue ;; + esac + + if [[ "$clean_path" == *.sh || "$clean_path" == *.py || "$clean_path" == */scripts/* ]]; then + [[ -e "$clean_path" ]] || missing_paths+=("$clean_path") + fi + done < <(grep -oE '/(opt|root|home|usr/local|etc)/[^[:space:]|;&<>]+' <<< "$cron_line" || true) + done < <(crontab -l 2>/dev/null || true) + + if (( ${#missing_paths[@]} > 0 )); then + record cron_missing_paths WARN "${missing_paths[*]}" "Cron references missing path(s): ${missing_paths[*]}" + else + record cron_missing_paths OK "0 missing" "No missing script paths in root crontab" + fi +} + # ── Run all checks ─────────────────────────────────────────────────────────── if ! $JSON_MODE && ! $QUIET; then @@ -269,6 +315,7 @@ check_swap check_docker_containers check_docker_disk check_logs +check_automation_drift # ── Summary ──────────────────────────────────────────────────────────────────