- vm-health-check.sh: read-only checks for disk, load, RAM, swap, Docker containers (crash-loops + healthchecks), build cache, journal. Flags: --quiet, --json, --notify (Telegram). Exit 0/1/2 = OK/WARN/CRIT. - vm-cleanup.sh: safe periodic cleanup. Default (weekly): build cache, journal, apt, npm, .next/cache. --full (monthly): adds docker system prune, pnpm store, old logs, HOLD cleanup. --dry-run, --install-cron, --uninstall-cron. Logs to /var/log/vm-cleanup.log. Related: docs/hostinger-vm-maintenance.md, scripts/VMs/HostingerVM/CRON_SETUP.md Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
378 lines
13 KiB
Bash
Executable File
378 lines
13 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# =============================================================================
|
|
# vm-cleanup.sh — Hostinger VM Safe Periodic Cleanup
|
|
#
|
|
# Designed to be run manually or via cron. All operations are either
|
|
# completely safe (read-only builds) or will prompt for confirmation when
|
|
# removing things that can't be trivially regenerated.
|
|
#
|
|
# Modes:
|
|
# (default) Weekly safe cleanup — build cache, apt, npm, journal, .next/cache
|
|
# --full Monthly deeper cleanup — adds: pnpm store, docker system prune,
|
|
# old log files, Docker image dangling prune
|
|
# --dry-run Print what would be done, make no changes
|
|
# --install-cron Install the recommended cron schedule for both scripts
|
|
# --uninstall-cron Remove the installed cron jobs
|
|
#
|
|
# All destructive steps are gated behind SAFE / CAREFUL / MANUAL labels
|
|
# in the output so you can audit what ran.
|
|
#
|
|
# Logs to: /var/log/vm-cleanup.log
|
|
# =============================================================================
|
|
set -Eeuo pipefail
|
|
|
|
# ── Config ───────────────────────────────────────────────────────────────────
|
|
LOG_FILE="/var/log/vm-cleanup.log"
|
|
SCRIPT_PATH="$(realpath "${BASH_SOURCE[0]}")"
|
|
SCRIPT_DIR="$(dirname "$SCRIPT_PATH")"
|
|
HEALTH_CHECK="$SCRIPT_DIR/vm-health-check.sh"
|
|
|
|
# Paths that must NEVER be deleted even in --full mode
|
|
# shellcheck disable=SC2034
|
|
PROTECTED_PATHS=(
|
|
"/opt/bytelyst/learning_ai_common_plat"
|
|
"/opt/bytelyst/learning_ai_devops_tools"
|
|
"/usr/local/lib/hermes-agent"
|
|
"/usr/share/ollama"
|
|
"/swapfile"
|
|
)
|
|
|
|
# node_modules dirs in active (non-HOLD) repos to never touch
|
|
# shellcheck disable=SC2034
|
|
ACTIVE_NODE_MODULES=(
|
|
"/opt/bytelyst/learning_ai_common_plat/node_modules"
|
|
"/opt/bytelyst/learning_ai_flowmonk/node_modules"
|
|
"/opt/bytelyst/learning_ai_clock/node_modules"
|
|
"/opt/bytelyst/learning_ai_notes/node_modules"
|
|
"/opt/bytelyst/learning_ai_devops_tools/dashboard/node_modules"
|
|
"/opt/bytelyst/learning_ai_invt_trdg/node_modules"
|
|
)
|
|
|
|
# ── Colour codes ─────────────────────────────────────────────────────────────
|
|
RED=$'\033[0;31m'
|
|
YELLOW=$'\033[1;33m'
|
|
GREEN=$'\033[0;32m'
|
|
CYAN=$'\033[0;36m'
|
|
BOLD=$'\033[1m'
|
|
DIM=$'\033[2m'
|
|
NC=$'\033[0m'
|
|
|
|
# ── Flags ────────────────────────────────────────────────────────────────────
|
|
FULL_MODE=false
|
|
DRY_RUN=false
|
|
INSTALL_CRON=false
|
|
UNINSTALL_CRON=false
|
|
QUIET=false
|
|
|
|
for arg in "$@"; do
|
|
case "$arg" in
|
|
--full) FULL_MODE=true ;;
|
|
--dry-run) DRY_RUN=true ;;
|
|
--install-cron) INSTALL_CRON=true ;;
|
|
--uninstall-cron) UNINSTALL_CRON=true ;;
|
|
--quiet) QUIET=true ;;
|
|
esac
|
|
done
|
|
|
|
# ── Helpers ──────────────────────────────────────────────────────────────────
|
|
log() {
|
|
local msg
|
|
msg="[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] $*"
|
|
echo "$msg" >> "$LOG_FILE" 2>/dev/null || true
|
|
$QUIET || echo -e "$*"
|
|
}
|
|
|
|
log_header() {
|
|
$QUIET || echo -e "\n${BOLD}${CYAN}── $1 ──────────────────────────────────────${NC}"
|
|
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] === $1 ===" >> "$LOG_FILE" 2>/dev/null || true
|
|
}
|
|
|
|
log_step() {
|
|
local label="$1" msg="$2"
|
|
case "$label" in
|
|
SAFE) $QUIET || echo -e " ${GREEN}[SAFE]${NC} $msg" ;;
|
|
CAREFUL) $QUIET || echo -e " ${YELLOW}[CAREFUL]${NC} $msg" ;;
|
|
SKIP) $QUIET || echo -e " ${DIM}[SKIP]${NC} $msg" ;;
|
|
DRY) $QUIET || echo -e " ${CYAN}[DRY-RUN]${NC} $msg" ;;
|
|
esac
|
|
}
|
|
|
|
run_cmd() {
|
|
# run_cmd LABEL "description" cmd args...
|
|
local label="$1" desc="$2"
|
|
shift 2
|
|
log_step "$label" "$desc"
|
|
if $DRY_RUN; then
|
|
log_step DRY "would run: $*"
|
|
return 0
|
|
fi
|
|
log "[CMD] $*"
|
|
"$@" >> "$LOG_FILE" 2>&1 || true
|
|
}
|
|
|
|
disk_before=""
|
|
record_disk_before() {
|
|
disk_before=$(df -h / --output=used,avail,pcent | tail -1 | tr -s ' ')
|
|
}
|
|
|
|
report_disk_delta() {
|
|
local disk_after
|
|
disk_after=$(df -h / --output=used,avail,pcent | tail -1 | tr -s ' ')
|
|
if ! $QUIET; then
|
|
echo -e "\n ${DIM}Before: $disk_before${NC}"
|
|
echo -e " ${GREEN}After: $disk_after${NC}"
|
|
fi
|
|
log "[DISK] before=$disk_before after=$disk_after"
|
|
}
|
|
|
|
# ── Safety guard ─────────────────────────────────────────────────────────────
|
|
require_root() {
|
|
if [[ "$(id -u)" -ne 0 ]]; then
|
|
echo -e "${RED}ERROR: This script must be run as root (use sudo)${NC}" >&2
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
# ── Cron install/uninstall ───────────────────────────────────────────────────
|
|
do_install_cron() {
|
|
echo -e "\n${BOLD}Installing cron schedule…${NC}\n"
|
|
|
|
local cron_tag="# bytelyst-vm-maintenance"
|
|
local tmp_cron
|
|
tmp_cron=$(mktemp)
|
|
|
|
# Export existing crontab (minus our managed block)
|
|
crontab -l 2>/dev/null | grep -v "$cron_tag" > "$tmp_cron" || true
|
|
|
|
cat >> "$tmp_cron" <<EOF
|
|
|
|
$cron_tag — DO NOT EDIT this block manually, use --install-cron / --uninstall-cron
|
|
# Daily health check at 07:00 UTC (read-only, sends Telegram alert on WARNING/CRITICAL)
|
|
0 7 * * * bash $SCRIPT_PATH --quiet 2>&1 | logger -t vm-cleanup
|
|
# Daily build cache prune at 03:00 UTC (always safe, never removes images)
|
|
0 3 * * * bash $SCRIPT_PATH --quiet 2>&1 | logger -t vm-cleanup
|
|
# Weekly cleanup (Sunday 02:00 UTC) — logs, apt, npm, .next/cache, build cache
|
|
0 2 * * 0 bash $SCRIPT_PATH --quiet 2>&1 | logger -t vm-cleanup
|
|
# Monthly full cleanup (1st of month 01:00 UTC) — adds pnpm store, docker system prune
|
|
0 1 1 * * bash $SCRIPT_PATH --full --quiet 2>&1 | logger -t vm-cleanup
|
|
EOF
|
|
|
|
crontab "$tmp_cron"
|
|
rm -f "$tmp_cron"
|
|
|
|
echo -e " ${GREEN}✓ Cron jobs installed. Current schedule:${NC}"
|
|
echo ""
|
|
crontab -l | grep -A20 "$cron_tag" || true
|
|
echo ""
|
|
echo -e " View logs: ${CYAN}tail -f $LOG_FILE${NC}"
|
|
echo -e " View cron: ${CYAN}crontab -l${NC}"
|
|
echo -e " Remove: ${CYAN}bash $SCRIPT_PATH --uninstall-cron${NC}"
|
|
}
|
|
|
|
do_uninstall_cron() {
|
|
local cron_tag="# bytelyst-vm-maintenance"
|
|
local tmp_cron
|
|
tmp_cron=$(mktemp)
|
|
crontab -l 2>/dev/null | grep -v "$cron_tag" > "$tmp_cron" || true
|
|
# Also strip the actual cron lines we added (they follow the tag block)
|
|
grep -v "vm-health-check.sh\|vm-cleanup.sh" "$tmp_cron" > "${tmp_cron}.clean" || true
|
|
crontab "${tmp_cron}.clean"
|
|
rm -f "$tmp_cron" "${tmp_cron}.clean"
|
|
echo -e " ${GREEN}✓ Cron jobs removed${NC}"
|
|
}
|
|
|
|
# ── Cleanup steps ─────────────────────────────────────────────────────────────
|
|
|
|
step_docker_build_cache() {
|
|
log_header "Docker Build Cache"
|
|
if ! docker info &>/dev/null 2>&1; then
|
|
log_step SKIP "Docker not running — skipping build cache prune"
|
|
return
|
|
fi
|
|
local cache_size
|
|
cache_size=$(docker system df 2>/dev/null | awk '/^Build Cache/ {print $3}' || echo "?")
|
|
run_cmd SAFE "Prune Docker build cache (currently $cache_size)" \
|
|
docker builder prune -f
|
|
}
|
|
|
|
step_docker_system_prune() {
|
|
# Removes stopped containers, unused networks, dangling images ONLY
|
|
# Does NOT remove images used by any existing container
|
|
log_header "Docker System Prune (dangling only)"
|
|
if ! docker info &>/dev/null 2>&1; then
|
|
log_step SKIP "Docker not running"
|
|
return
|
|
fi
|
|
run_cmd SAFE "Remove stopped containers, unused networks, dangling images" \
|
|
docker system prune -f
|
|
}
|
|
|
|
step_docker_crash_loop_check() {
|
|
log_header "Crash Loop Check"
|
|
if ! docker info &>/dev/null 2>&1; then return; fi
|
|
|
|
local looping=()
|
|
while IFS=$'\t' read -r name restarts; do
|
|
[[ -z "$name" || "$name" == "NAMES" ]] && continue
|
|
restarts="${restarts:-0}"
|
|
if (( restarts >= 20 )); then looping+=("$name(${restarts}x)"); fi
|
|
done < <(docker ps -a --format $'{{.Names}}\t{{.RestartCount}}' 2>/dev/null || true)
|
|
|
|
if (( ${#looping[@]} > 0 )); then
|
|
echo -e " ${RED}${BOLD}⚠ CRASH LOOPS DETECTED — manual fix required:${NC}"
|
|
for c in "${looping[@]}"; do
|
|
echo -e " ${RED}→${NC} $c"
|
|
echo -e " ${DIM}fix: docker logs ${c%%(*)} | tail -20${NC}"
|
|
echo -e " ${DIM}stop loop: docker update --restart=no ${c%%(*)}${NC}"
|
|
done
|
|
log "[WARN] crash-looping containers: ${looping[*]}"
|
|
else
|
|
log_step SAFE "No crash-looping containers"
|
|
fi
|
|
}
|
|
|
|
step_journal() {
|
|
log_header "Journal Logs"
|
|
run_cmd SAFE "Vacuum journal to 200MB" \
|
|
journalctl --vacuum-size=200M
|
|
run_cmd SAFE "Vacuum journal older than 7 days" \
|
|
journalctl --vacuum-time=7d
|
|
}
|
|
|
|
step_apt_cache() {
|
|
log_header "APT Cache"
|
|
run_cmd SAFE "Clean apt package cache" \
|
|
apt-get clean
|
|
}
|
|
|
|
step_npm_cache() {
|
|
log_header "NPM Cache"
|
|
if command -v npm &>/dev/null; then
|
|
run_cmd SAFE "Clean npm cache" \
|
|
npm cache clean --force
|
|
fi
|
|
}
|
|
|
|
step_next_cache() {
|
|
log_header ".next/cache Directories"
|
|
# Only delete .next/cache (webpack/babel/tsbuild cache), NOT .next/standalone (prod build)
|
|
local count=0
|
|
while IFS= read -r cache_dir; do
|
|
log_step SAFE "Remove $cache_dir"
|
|
if ! $DRY_RUN; then rm -rf "$cache_dir"; fi
|
|
(( count++ ))
|
|
done < <(
|
|
find /opt/bytelyst -name ".next" -maxdepth 7 -type d 2>/dev/null \
|
|
| while read -r d; do
|
|
[[ -d "$d/cache" ]] && echo "$d/cache"
|
|
done
|
|
)
|
|
if (( count == 0 )); then log_step SKIP "No .next/cache dirs found"; fi
|
|
}
|
|
|
|
step_pnpm_store() {
|
|
log_header "PNPM Store"
|
|
if command -v pnpm &>/dev/null; then
|
|
run_cmd SAFE "Prune unreferenced packages from pnpm store" \
|
|
pnpm store prune
|
|
else
|
|
log_step SKIP "pnpm not found"
|
|
fi
|
|
}
|
|
|
|
step_old_logs() {
|
|
log_header "Old Log Files"
|
|
# Compress any uncompressed .1 rotations that logrotate missed
|
|
local count=0
|
|
for f in /var/log/syslog.1 /var/log/kern.log.1 /var/log/ufw.log.1; do
|
|
if [[ -f "$f" && ! -f "${f}.gz" ]]; then
|
|
run_cmd SAFE "Compress $f" gzip -9 "$f"
|
|
(( count++ ))
|
|
fi
|
|
done
|
|
# Remove log rotations older than 30 days
|
|
while IFS= read -r old_log; do
|
|
run_cmd CAREFUL "Remove old log: $old_log" rm -f "$old_log"
|
|
done < <(find /var/log -name "*.gz" -mtime +30 -type f 2>/dev/null || true)
|
|
if (( count == 0 )); then log_step SKIP "No uncompressed rotations to compress"; fi
|
|
}
|
|
|
|
step_hold_cleanup() {
|
|
log_header "HOLD Archived Projects"
|
|
# node_modules in HOLD are safe to delete — code stays, can be reinstalled
|
|
local total_freed=0
|
|
local found=0
|
|
while IFS= read -r nm; do
|
|
local size
|
|
size=$(du -sm "$nm" 2>/dev/null | cut -f1 || echo "0")
|
|
run_cmd CAREFUL "Delete archived node_modules: $nm (~${size}MB)" rm -rf "$nm"
|
|
total_freed=$(( total_freed + size ))
|
|
(( found++ ))
|
|
done < <(
|
|
find /opt/bytelyst/HOLD -name "node_modules" -maxdepth 4 -type d 2>/dev/null || true
|
|
)
|
|
if (( found == 0 )); then
|
|
log_step SKIP "HOLD node_modules already clean"
|
|
else
|
|
log "[INFO] Freed ~${total_freed}MB from HOLD node_modules"
|
|
fi
|
|
|
|
# .next build artifacts in HOLD
|
|
while IFS= read -r next_dir; do
|
|
run_cmd CAREFUL "Delete archived .next: $next_dir" rm -rf "$next_dir"
|
|
done < <(
|
|
find /opt/bytelyst/HOLD -name ".next" -maxdepth 6 -type d 2>/dev/null || true
|
|
)
|
|
}
|
|
|
|
# ── Main ─────────────────────────────────────────────────────────────────────
|
|
|
|
# Handle special modes first (no root needed for these)
|
|
if $INSTALL_CRON; then require_root; do_install_cron; exit 0; fi
|
|
if $UNINSTALL_CRON; then require_root; do_uninstall_cron; exit 0; fi
|
|
|
|
require_root
|
|
|
|
if ! $QUIET; then
|
|
if $FULL_MODE; then MODE="FULL"; else MODE="STANDARD"; fi
|
|
if $DRY_RUN; then DRY=" (DRY-RUN)"; else DRY=""; fi
|
|
echo -e "\n${BOLD}VM Cleanup — $(hostname) — ${MODE}${DRY}${NC}"
|
|
echo -e "${DIM}$(date -u '+%Y-%m-%d %H:%M UTC')${NC}"
|
|
fi
|
|
|
|
if $FULL_MODE; then _mode="full"; else _mode="standard"; fi
|
|
log "[START] mode=${_mode} dry=$DRY_RUN"
|
|
record_disk_before
|
|
|
|
# ── WEEKLY (always run) ──────────────────────────────────────────────────────
|
|
step_docker_build_cache
|
|
step_docker_crash_loop_check
|
|
step_journal
|
|
step_apt_cache
|
|
step_npm_cache
|
|
step_next_cache
|
|
|
|
# ── MONTHLY (only with --full) ───────────────────────────────────────────────
|
|
if $FULL_MODE; then
|
|
step_docker_system_prune
|
|
step_pnpm_store
|
|
step_old_logs
|
|
step_hold_cleanup
|
|
fi
|
|
|
|
# ── Final report ─────────────────────────────────────────────────────────────
|
|
report_disk_delta
|
|
|
|
if ! $QUIET; then
|
|
echo -e "\n${GREEN}${BOLD}✓ Cleanup complete${NC}"
|
|
echo -e " Log: ${CYAN}tail -50 $LOG_FILE${NC}"
|
|
|
|
if [[ -f "$HEALTH_CHECK" ]]; then
|
|
echo ""
|
|
echo -e "${DIM}Running health check…${NC}"
|
|
bash "$HEALTH_CHECK" || true
|
|
fi
|
|
fi
|
|
|
|
log "[END] cleanup complete"
|