#!/usr/bin/env bash # ============================================================================= # vm-cleanup.sh — Hostinger VM Safe Periodic Cleanup # # Designed to be run manually or via cron. All operations are either # completely safe (read-only builds) or will prompt for confirmation when # removing things that can't be trivially regenerated. # # Modes: # (default) Weekly safe cleanup — build cache, apt, npm, journal, .next/cache # --full Monthly deeper cleanup — adds: pnpm store, docker system prune, # old log files, Docker image dangling prune # --dry-run Print what would be done, make no changes # --install-cron Install the recommended cron schedule for both scripts # --uninstall-cron Remove the installed cron jobs # # All destructive steps are gated behind SAFE / CAREFUL / MANUAL labels # in the output so you can audit what ran. # # Logs to: /var/log/vm-cleanup.log # ============================================================================= set -Eeuo pipefail # ── Config ─────────────────────────────────────────────────────────────────── LOG_FILE="/var/log/vm-cleanup.log" SCRIPT_PATH="$(realpath "${BASH_SOURCE[0]}")" SCRIPT_DIR="$(dirname "$SCRIPT_PATH")" HEALTH_CHECK="$SCRIPT_DIR/vm-health-check.sh" # Paths that must NEVER be deleted even in --full mode # shellcheck disable=SC2034 PROTECTED_PATHS=( "/opt/bytelyst/learning_ai_common_plat" "/opt/bytelyst/learning_ai_devops_tools" "/usr/local/lib/hermes-agent" "/usr/share/ollama" "/swapfile" ) # node_modules dirs in active (non-HOLD) repos to never touch # shellcheck disable=SC2034 ACTIVE_NODE_MODULES=( "/opt/bytelyst/learning_ai_common_plat/node_modules" "/opt/bytelyst/learning_ai_flowmonk/node_modules" "/opt/bytelyst/learning_ai_clock/node_modules" "/opt/bytelyst/learning_ai_notes/node_modules" "/opt/bytelyst/learning_ai_devops_tools/dashboard/node_modules" "/opt/bytelyst/learning_ai_invt_trdg/node_modules" ) # ── Colour codes ───────────────────────────────────────────────────────────── RED=$'\033[0;31m' YELLOW=$'\033[1;33m' GREEN=$'\033[0;32m' CYAN=$'\033[0;36m' BOLD=$'\033[1m' DIM=$'\033[2m' NC=$'\033[0m' # ── Flags ──────────────────────────────────────────────────────────────────── FULL_MODE=false DRY_RUN=false INSTALL_CRON=false UNINSTALL_CRON=false QUIET=false for arg in "$@"; do case "$arg" in --full) FULL_MODE=true ;; --dry-run) DRY_RUN=true ;; --install-cron) INSTALL_CRON=true ;; --uninstall-cron) UNINSTALL_CRON=true ;; --quiet) QUIET=true ;; esac done # ── Helpers ────────────────────────────────────────────────────────────────── log() { local msg msg="[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] $*" echo "$msg" >> "$LOG_FILE" 2>/dev/null || true $QUIET || echo -e "$*" } log_header() { $QUIET || echo -e "\n${BOLD}${CYAN}── $1 ──────────────────────────────────────${NC}" echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] === $1 ===" >> "$LOG_FILE" 2>/dev/null || true } log_step() { local label="$1" msg="$2" case "$label" in SAFE) $QUIET || echo -e " ${GREEN}[SAFE]${NC} $msg" ;; CAREFUL) $QUIET || echo -e " ${YELLOW}[CAREFUL]${NC} $msg" ;; SKIP) $QUIET || echo -e " ${DIM}[SKIP]${NC} $msg" ;; DRY) $QUIET || echo -e " ${CYAN}[DRY-RUN]${NC} $msg" ;; esac } run_cmd() { # run_cmd LABEL "description" cmd args... local label="$1" desc="$2" shift 2 log_step "$label" "$desc" if $DRY_RUN; then log_step DRY "would run: $*" return 0 fi log "[CMD] $*" "$@" >> "$LOG_FILE" 2>&1 || true } disk_before="" record_disk_before() { disk_before=$(df -h / --output=used,avail,pcent | tail -1 | tr -s ' ') } report_disk_delta() { local disk_after disk_after=$(df -h / --output=used,avail,pcent | tail -1 | tr -s ' ') if ! $QUIET; then echo -e "\n ${DIM}Before: $disk_before${NC}" echo -e " ${GREEN}After: $disk_after${NC}" fi log "[DISK] before=$disk_before after=$disk_after" } # ── Safety guard ───────────────────────────────────────────────────────────── require_root() { if [[ "$(id -u)" -ne 0 ]]; then echo -e "${RED}ERROR: This script must be run as root (use sudo)${NC}" >&2 exit 1 fi } # ── Cron install/uninstall ─────────────────────────────────────────────────── do_install_cron() { echo -e "\n${BOLD}Installing cron schedule…${NC}\n" local cron_tag="# bytelyst-vm-maintenance" local tmp_cron tmp_cron=$(mktemp) # Export existing crontab (minus our managed block) crontab -l 2>/dev/null | grep -v "$cron_tag" > "$tmp_cron" || true cat >> "$tmp_cron" <&1 | logger -t vm-health-check # Daily build cache prune at 03:00 UTC (always safe, never removes images) 0 3 * * * bash $SCRIPT_PATH --quiet 2>&1 | logger -t vm-cleanup # Weekly cleanup (Sunday 02:00 UTC) — logs, apt, npm, .next/cache, build cache 0 2 * * 0 bash $SCRIPT_PATH --quiet 2>&1 | logger -t vm-cleanup # Monthly full cleanup (1st of month 01:00 UTC) — adds pnpm store, docker system prune 0 1 1 * * bash $SCRIPT_PATH --full --quiet 2>&1 | logger -t vm-cleanup EOF crontab "$tmp_cron" rm -f "$tmp_cron" echo -e " ${GREEN}✓ Cron jobs installed. Current schedule:${NC}" echo "" crontab -l | grep -A20 "$cron_tag" || true echo "" echo -e " View logs: ${CYAN}tail -f $LOG_FILE${NC}" echo -e " View cron: ${CYAN}crontab -l${NC}" echo -e " Remove: ${CYAN}bash $SCRIPT_PATH --uninstall-cron${NC}" } do_uninstall_cron() { local cron_tag="# bytelyst-vm-maintenance" local tmp_cron tmp_cron=$(mktemp) crontab -l 2>/dev/null | grep -v "$cron_tag" > "$tmp_cron" || true # Also strip the actual cron lines we added (they follow the tag block) grep -v "vm-health-check.sh\|vm-cleanup.sh" "$tmp_cron" > "${tmp_cron}.clean" || true crontab "${tmp_cron}.clean" rm -f "$tmp_cron" "${tmp_cron}.clean" echo -e " ${GREEN}✓ Cron jobs removed${NC}" } # ── Cleanup steps ───────────────────────────────────────────────────────────── step_docker_build_cache() { log_header "Docker Build Cache" if ! docker info &>/dev/null 2>&1; then log_step SKIP "Docker not running — skipping build cache prune" return fi local cache_size cache_size=$(docker system df 2>/dev/null | awk '/^Build Cache/ {print $3}' || echo "?") run_cmd SAFE "Prune Docker build cache (currently $cache_size)" \ docker builder prune -f } step_docker_system_prune() { # Removes stopped containers, unused networks, dangling images ONLY # Does NOT remove images used by any existing container log_header "Docker System Prune (dangling only)" if ! docker info &>/dev/null 2>&1; then log_step SKIP "Docker not running" return fi run_cmd SAFE "Remove stopped containers, unused networks, dangling images" \ docker system prune -f } step_docker_crash_loop_check() { log_header "Crash Loop Check" if ! docker info &>/dev/null 2>&1; then return; fi local looping=() while IFS=$'\t' read -r name restarts; do [[ -z "$name" || "$name" == "NAMES" ]] && continue restarts="${restarts:-0}" if (( restarts >= 20 )); then looping+=("$name(${restarts}x)"); fi done < <(docker ps -a --format $'{{.Names}}\t{{.RestartCount}}' 2>/dev/null || true) if (( ${#looping[@]} > 0 )); then echo -e " ${RED}${BOLD}⚠ CRASH LOOPS DETECTED — manual fix required:${NC}" for c in "${looping[@]}"; do echo -e " ${RED}→${NC} $c" echo -e " ${DIM}fix: docker logs ${c%%(*)} | tail -20${NC}" echo -e " ${DIM}stop loop: docker update --restart=no ${c%%(*)}${NC}" done log "[WARN] crash-looping containers: ${looping[*]}" else log_step SAFE "No crash-looping containers" fi } step_journal() { log_header "Journal Logs" run_cmd SAFE "Vacuum journal to 200MB" \ journalctl --vacuum-size=200M run_cmd SAFE "Vacuum journal older than 7 days" \ journalctl --vacuum-time=7d } step_apt_cache() { log_header "APT Cache" run_cmd SAFE "Clean apt package cache" \ apt-get clean } step_npm_cache() { log_header "NPM Cache" if command -v npm &>/dev/null; then run_cmd SAFE "Clean npm cache" \ npm cache clean --force fi } step_next_cache() { log_header ".next/cache Directories" # Only delete .next/cache (webpack/babel/tsbuild cache), NOT .next/standalone (prod build) local count=0 while IFS= read -r cache_dir; do log_step SAFE "Remove $cache_dir" if ! $DRY_RUN; then rm -rf "$cache_dir"; fi (( count++ )) done < <( find /opt/bytelyst -name ".next" -maxdepth 7 -type d 2>/dev/null \ | while read -r d; do [[ -d "$d/cache" ]] && echo "$d/cache" done ) if (( count == 0 )); then log_step SKIP "No .next/cache dirs found"; fi } step_pnpm_store() { log_header "PNPM Store" if command -v pnpm &>/dev/null; then run_cmd SAFE "Prune unreferenced packages from pnpm store" \ pnpm store prune else log_step SKIP "pnpm not found" fi } step_old_logs() { log_header "Old Log Files" # Compress any uncompressed .1 rotations that logrotate missed local count=0 for f in /var/log/syslog.1 /var/log/kern.log.1 /var/log/ufw.log.1; do if [[ -f "$f" && ! -f "${f}.gz" ]]; then run_cmd SAFE "Compress $f" gzip -9 "$f" (( count++ )) fi done # Remove log rotations older than 30 days while IFS= read -r old_log; do run_cmd CAREFUL "Remove old log: $old_log" rm -f "$old_log" done < <(find /var/log -name "*.gz" -mtime +30 -type f 2>/dev/null || true) if (( count == 0 )); then log_step SKIP "No uncompressed rotations to compress"; fi } step_hold_cleanup() { log_header "HOLD Archived Projects" # node_modules in HOLD are safe to delete — code stays, can be reinstalled local total_freed=0 local found=0 while IFS= read -r nm; do local size size=$(du -sm "$nm" 2>/dev/null | cut -f1 || echo "0") run_cmd CAREFUL "Delete archived node_modules: $nm (~${size}MB)" rm -rf "$nm" total_freed=$(( total_freed + size )) (( found++ )) done < <( find /opt/bytelyst/HOLD -name "node_modules" -maxdepth 4 -type d 2>/dev/null || true ) if (( found == 0 )); then log_step SKIP "HOLD node_modules already clean" else log "[INFO] Freed ~${total_freed}MB from HOLD node_modules" fi # .next build artifacts in HOLD while IFS= read -r next_dir; do run_cmd CAREFUL "Delete archived .next: $next_dir" rm -rf "$next_dir" done < <( find /opt/bytelyst/HOLD -name ".next" -maxdepth 6 -type d 2>/dev/null || true ) } # ── Main ───────────────────────────────────────────────────────────────────── # Handle special modes first (no root needed for these) if $INSTALL_CRON; then require_root; do_install_cron; exit 0; fi if $UNINSTALL_CRON; then require_root; do_uninstall_cron; exit 0; fi require_root if ! $QUIET; then if $FULL_MODE; then MODE="FULL"; else MODE="STANDARD"; fi if $DRY_RUN; then DRY=" (DRY-RUN)"; else DRY=""; fi echo -e "\n${BOLD}VM Cleanup — $(hostname) — ${MODE}${DRY}${NC}" echo -e "${DIM}$(date -u '+%Y-%m-%d %H:%M UTC')${NC}" fi if $FULL_MODE; then _mode="full"; else _mode="standard"; fi log "[START] mode=${_mode} dry=$DRY_RUN" record_disk_before # ── WEEKLY (always run) ────────────────────────────────────────────────────── step_docker_build_cache step_docker_crash_loop_check step_journal step_apt_cache step_npm_cache step_next_cache # ── MONTHLY (only with --full) ─────────────────────────────────────────────── if $FULL_MODE; then step_docker_system_prune step_pnpm_store step_old_logs step_hold_cleanup fi # ── Final report ───────────────────────────────────────────────────────────── report_disk_delta if ! $QUIET; then echo -e "\n${GREEN}${BOLD}✓ Cleanup complete${NC}" echo -e " Log: ${CYAN}tail -50 $LOG_FILE${NC}" if [[ -f "$HEALTH_CHECK" ]]; then echo "" echo -e "${DIM}Running health check…${NC}" bash "$HEALTH_CHECK" || true fi fi log "[END] cleanup complete"