bytelyst-devops-tools/scripts/VMs/HostingerVM/vm-cleanup.sh
Hermes VM 0a2d303f93 add HostingerVM health-check and cleanup scripts
- vm-health-check.sh: read-only checks for disk, load, RAM, swap,
  Docker containers (crash-loops + healthchecks), build cache, journal.
  Flags: --quiet, --json, --notify (Telegram). Exit 0/1/2 = OK/WARN/CRIT.

- vm-cleanup.sh: safe periodic cleanup.
  Default (weekly): build cache, journal, apt, npm, .next/cache.
  --full (monthly): adds docker system prune, pnpm store, old logs, HOLD cleanup.
  --dry-run, --install-cron, --uninstall-cron.
  Logs to /var/log/vm-cleanup.log.

Related: docs/hostinger-vm-maintenance.md, scripts/VMs/HostingerVM/CRON_SETUP.md

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-27 18:53:20 +00:00

378 lines
13 KiB
Bash
Executable File

#!/usr/bin/env bash
# =============================================================================
# vm-cleanup.sh — Hostinger VM Safe Periodic Cleanup
#
# Designed to be run manually or via cron. All operations are either
# completely safe (read-only builds) or will prompt for confirmation when
# removing things that can't be trivially regenerated.
#
# Modes:
# (default) Weekly safe cleanup — build cache, apt, npm, journal, .next/cache
# --full Monthly deeper cleanup — adds: pnpm store, docker system prune,
# old log files, Docker image dangling prune
# --dry-run Print what would be done, make no changes
# --install-cron Install the recommended cron schedule for both scripts
# --uninstall-cron Remove the installed cron jobs
#
# All destructive steps are gated behind SAFE / CAREFUL / MANUAL labels
# in the output so you can audit what ran.
#
# Logs to: /var/log/vm-cleanup.log
# =============================================================================
set -Eeuo pipefail
# ── Config ───────────────────────────────────────────────────────────────────
LOG_FILE="/var/log/vm-cleanup.log"
SCRIPT_PATH="$(realpath "${BASH_SOURCE[0]}")"
SCRIPT_DIR="$(dirname "$SCRIPT_PATH")"
HEALTH_CHECK="$SCRIPT_DIR/vm-health-check.sh"
# Paths that must NEVER be deleted even in --full mode
# shellcheck disable=SC2034
PROTECTED_PATHS=(
"/opt/bytelyst/learning_ai_common_plat"
"/opt/bytelyst/learning_ai_devops_tools"
"/usr/local/lib/hermes-agent"
"/usr/share/ollama"
"/swapfile"
)
# node_modules dirs in active (non-HOLD) repos to never touch
# shellcheck disable=SC2034
ACTIVE_NODE_MODULES=(
"/opt/bytelyst/learning_ai_common_plat/node_modules"
"/opt/bytelyst/learning_ai_flowmonk/node_modules"
"/opt/bytelyst/learning_ai_clock/node_modules"
"/opt/bytelyst/learning_ai_notes/node_modules"
"/opt/bytelyst/learning_ai_devops_tools/dashboard/node_modules"
"/opt/bytelyst/learning_ai_invt_trdg/node_modules"
)
# ── Colour codes ─────────────────────────────────────────────────────────────
RED=$'\033[0;31m'
YELLOW=$'\033[1;33m'
GREEN=$'\033[0;32m'
CYAN=$'\033[0;36m'
BOLD=$'\033[1m'
DIM=$'\033[2m'
NC=$'\033[0m'
# ── Flags ────────────────────────────────────────────────────────────────────
FULL_MODE=false
DRY_RUN=false
INSTALL_CRON=false
UNINSTALL_CRON=false
QUIET=false
for arg in "$@"; do
case "$arg" in
--full) FULL_MODE=true ;;
--dry-run) DRY_RUN=true ;;
--install-cron) INSTALL_CRON=true ;;
--uninstall-cron) UNINSTALL_CRON=true ;;
--quiet) QUIET=true ;;
esac
done
# ── Helpers ──────────────────────────────────────────────────────────────────
log() {
local msg
msg="[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] $*"
echo "$msg" >> "$LOG_FILE" 2>/dev/null || true
$QUIET || echo -e "$*"
}
log_header() {
$QUIET || echo -e "\n${BOLD}${CYAN}── $1 ──────────────────────────────────────${NC}"
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] === $1 ===" >> "$LOG_FILE" 2>/dev/null || true
}
log_step() {
local label="$1" msg="$2"
case "$label" in
SAFE) $QUIET || echo -e " ${GREEN}[SAFE]${NC} $msg" ;;
CAREFUL) $QUIET || echo -e " ${YELLOW}[CAREFUL]${NC} $msg" ;;
SKIP) $QUIET || echo -e " ${DIM}[SKIP]${NC} $msg" ;;
DRY) $QUIET || echo -e " ${CYAN}[DRY-RUN]${NC} $msg" ;;
esac
}
run_cmd() {
# run_cmd LABEL "description" cmd args...
local label="$1" desc="$2"
shift 2
log_step "$label" "$desc"
if $DRY_RUN; then
log_step DRY "would run: $*"
return 0
fi
log "[CMD] $*"
"$@" >> "$LOG_FILE" 2>&1 || true
}
disk_before=""
record_disk_before() {
disk_before=$(df -h / --output=used,avail,pcent | tail -1 | tr -s ' ')
}
report_disk_delta() {
local disk_after
disk_after=$(df -h / --output=used,avail,pcent | tail -1 | tr -s ' ')
if ! $QUIET; then
echo -e "\n ${DIM}Before: $disk_before${NC}"
echo -e " ${GREEN}After: $disk_after${NC}"
fi
log "[DISK] before=$disk_before after=$disk_after"
}
# ── Safety guard ─────────────────────────────────────────────────────────────
require_root() {
if [[ "$(id -u)" -ne 0 ]]; then
echo -e "${RED}ERROR: This script must be run as root (use sudo)${NC}" >&2
exit 1
fi
}
# ── Cron install/uninstall ───────────────────────────────────────────────────
do_install_cron() {
echo -e "\n${BOLD}Installing cron schedule…${NC}\n"
local cron_tag="# bytelyst-vm-maintenance"
local tmp_cron
tmp_cron=$(mktemp)
# Export existing crontab (minus our managed block)
crontab -l 2>/dev/null | grep -v "$cron_tag" > "$tmp_cron" || true
cat >> "$tmp_cron" <<EOF
$cron_tag — DO NOT EDIT this block manually, use --install-cron / --uninstall-cron
# Daily health check at 07:00 UTC (read-only, sends Telegram alert on WARNING/CRITICAL)
0 7 * * * bash $SCRIPT_PATH --quiet 2>&1 | logger -t vm-cleanup
# Daily build cache prune at 03:00 UTC (always safe, never removes images)
0 3 * * * bash $SCRIPT_PATH --quiet 2>&1 | logger -t vm-cleanup
# Weekly cleanup (Sunday 02:00 UTC) — logs, apt, npm, .next/cache, build cache
0 2 * * 0 bash $SCRIPT_PATH --quiet 2>&1 | logger -t vm-cleanup
# Monthly full cleanup (1st of month 01:00 UTC) — adds pnpm store, docker system prune
0 1 1 * * bash $SCRIPT_PATH --full --quiet 2>&1 | logger -t vm-cleanup
EOF
crontab "$tmp_cron"
rm -f "$tmp_cron"
echo -e " ${GREEN}✓ Cron jobs installed. Current schedule:${NC}"
echo ""
crontab -l | grep -A20 "$cron_tag" || true
echo ""
echo -e " View logs: ${CYAN}tail -f $LOG_FILE${NC}"
echo -e " View cron: ${CYAN}crontab -l${NC}"
echo -e " Remove: ${CYAN}bash $SCRIPT_PATH --uninstall-cron${NC}"
}
do_uninstall_cron() {
local cron_tag="# bytelyst-vm-maintenance"
local tmp_cron
tmp_cron=$(mktemp)
crontab -l 2>/dev/null | grep -v "$cron_tag" > "$tmp_cron" || true
# Also strip the actual cron lines we added (they follow the tag block)
grep -v "vm-health-check.sh\|vm-cleanup.sh" "$tmp_cron" > "${tmp_cron}.clean" || true
crontab "${tmp_cron}.clean"
rm -f "$tmp_cron" "${tmp_cron}.clean"
echo -e " ${GREEN}✓ Cron jobs removed${NC}"
}
# ── Cleanup steps ─────────────────────────────────────────────────────────────
step_docker_build_cache() {
log_header "Docker Build Cache"
if ! docker info &>/dev/null 2>&1; then
log_step SKIP "Docker not running — skipping build cache prune"
return
fi
local cache_size
cache_size=$(docker system df 2>/dev/null | awk '/^Build Cache/ {print $3}' || echo "?")
run_cmd SAFE "Prune Docker build cache (currently $cache_size)" \
docker builder prune -f
}
step_docker_system_prune() {
# Removes stopped containers, unused networks, dangling images ONLY
# Does NOT remove images used by any existing container
log_header "Docker System Prune (dangling only)"
if ! docker info &>/dev/null 2>&1; then
log_step SKIP "Docker not running"
return
fi
run_cmd SAFE "Remove stopped containers, unused networks, dangling images" \
docker system prune -f
}
step_docker_crash_loop_check() {
log_header "Crash Loop Check"
if ! docker info &>/dev/null 2>&1; then return; fi
local looping=()
while IFS=$'\t' read -r name restarts; do
[[ -z "$name" || "$name" == "NAMES" ]] && continue
restarts="${restarts:-0}"
if (( restarts >= 20 )); then looping+=("$name(${restarts}x)"); fi
done < <(docker ps -a --format $'{{.Names}}\t{{.RestartCount}}' 2>/dev/null || true)
if (( ${#looping[@]} > 0 )); then
echo -e " ${RED}${BOLD}⚠ CRASH LOOPS DETECTED — manual fix required:${NC}"
for c in "${looping[@]}"; do
echo -e " ${RED}${NC} $c"
echo -e " ${DIM}fix: docker logs ${c%%(*)} | tail -20${NC}"
echo -e " ${DIM}stop loop: docker update --restart=no ${c%%(*)}${NC}"
done
log "[WARN] crash-looping containers: ${looping[*]}"
else
log_step SAFE "No crash-looping containers"
fi
}
step_journal() {
log_header "Journal Logs"
run_cmd SAFE "Vacuum journal to 200MB" \
journalctl --vacuum-size=200M
run_cmd SAFE "Vacuum journal older than 7 days" \
journalctl --vacuum-time=7d
}
step_apt_cache() {
log_header "APT Cache"
run_cmd SAFE "Clean apt package cache" \
apt-get clean
}
step_npm_cache() {
log_header "NPM Cache"
if command -v npm &>/dev/null; then
run_cmd SAFE "Clean npm cache" \
npm cache clean --force
fi
}
step_next_cache() {
log_header ".next/cache Directories"
# Only delete .next/cache (webpack/babel/tsbuild cache), NOT .next/standalone (prod build)
local count=0
while IFS= read -r cache_dir; do
log_step SAFE "Remove $cache_dir"
if ! $DRY_RUN; then rm -rf "$cache_dir"; fi
(( count++ ))
done < <(
find /opt/bytelyst -name ".next" -maxdepth 7 -type d 2>/dev/null \
| while read -r d; do
[[ -d "$d/cache" ]] && echo "$d/cache"
done
)
if (( count == 0 )); then log_step SKIP "No .next/cache dirs found"; fi
}
step_pnpm_store() {
log_header "PNPM Store"
if command -v pnpm &>/dev/null; then
run_cmd SAFE "Prune unreferenced packages from pnpm store" \
pnpm store prune
else
log_step SKIP "pnpm not found"
fi
}
step_old_logs() {
log_header "Old Log Files"
# Compress any uncompressed .1 rotations that logrotate missed
local count=0
for f in /var/log/syslog.1 /var/log/kern.log.1 /var/log/ufw.log.1; do
if [[ -f "$f" && ! -f "${f}.gz" ]]; then
run_cmd SAFE "Compress $f" gzip -9 "$f"
(( count++ ))
fi
done
# Remove log rotations older than 30 days
while IFS= read -r old_log; do
run_cmd CAREFUL "Remove old log: $old_log" rm -f "$old_log"
done < <(find /var/log -name "*.gz" -mtime +30 -type f 2>/dev/null || true)
if (( count == 0 )); then log_step SKIP "No uncompressed rotations to compress"; fi
}
step_hold_cleanup() {
log_header "HOLD Archived Projects"
# node_modules in HOLD are safe to delete — code stays, can be reinstalled
local total_freed=0
local found=0
while IFS= read -r nm; do
local size
size=$(du -sm "$nm" 2>/dev/null | cut -f1 || echo "0")
run_cmd CAREFUL "Delete archived node_modules: $nm (~${size}MB)" rm -rf "$nm"
total_freed=$(( total_freed + size ))
(( found++ ))
done < <(
find /opt/bytelyst/HOLD -name "node_modules" -maxdepth 4 -type d 2>/dev/null || true
)
if (( found == 0 )); then
log_step SKIP "HOLD node_modules already clean"
else
log "[INFO] Freed ~${total_freed}MB from HOLD node_modules"
fi
# .next build artifacts in HOLD
while IFS= read -r next_dir; do
run_cmd CAREFUL "Delete archived .next: $next_dir" rm -rf "$next_dir"
done < <(
find /opt/bytelyst/HOLD -name ".next" -maxdepth 6 -type d 2>/dev/null || true
)
}
# ── Main ─────────────────────────────────────────────────────────────────────
# Handle special modes first (no root needed for these)
if $INSTALL_CRON; then require_root; do_install_cron; exit 0; fi
if $UNINSTALL_CRON; then require_root; do_uninstall_cron; exit 0; fi
require_root
if ! $QUIET; then
if $FULL_MODE; then MODE="FULL"; else MODE="STANDARD"; fi
if $DRY_RUN; then DRY=" (DRY-RUN)"; else DRY=""; fi
echo -e "\n${BOLD}VM Cleanup — $(hostname)${MODE}${DRY}${NC}"
echo -e "${DIM}$(date -u '+%Y-%m-%d %H:%M UTC')${NC}"
fi
if $FULL_MODE; then _mode="full"; else _mode="standard"; fi
log "[START] mode=${_mode} dry=$DRY_RUN"
record_disk_before
# ── WEEKLY (always run) ──────────────────────────────────────────────────────
step_docker_build_cache
step_docker_crash_loop_check
step_journal
step_apt_cache
step_npm_cache
step_next_cache
# ── MONTHLY (only with --full) ───────────────────────────────────────────────
if $FULL_MODE; then
step_docker_system_prune
step_pnpm_store
step_old_logs
step_hold_cleanup
fi
# ── Final report ─────────────────────────────────────────────────────────────
report_disk_delta
if ! $QUIET; then
echo -e "\n${GREEN}${BOLD}✓ Cleanup complete${NC}"
echo -e " Log: ${CYAN}tail -50 $LOG_FILE${NC}"
if [[ -f "$HEALTH_CHECK" ]]; then
echo ""
echo -e "${DIM}Running health check…${NC}"
bash "$HEALTH_CHECK" || true
fi
fi
log "[END] cleanup complete"