fix(agent-queue): verify pid start time to defeat pid reuse
Record pidstart (ps lstart) at launch and verify it in all liveness checks (_meta_active, status, stop) via _pid_alive, so a recycled pid can never be mistaken for our worker. Falls back to plain liveness when no start time recorded.
This commit is contained in:
parent
a849a30e11
commit
4239648876
@ -112,6 +112,21 @@ _mtime() {
|
|||||||
stat -f %m "$1" 2>/dev/null || stat -c %Y "$1" 2>/dev/null || echo ""
|
stat -f %m "$1" 2>/dev/null || stat -c %Y "$1" 2>/dev/null || echo ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# _pidstart <pid> -> the process start time as reported by ps (whitespace-normalized).
|
||||||
|
# Used as an identity token so a recycled pid is never mistaken for our worker.
|
||||||
|
_pidstart() { ps -o lstart= -p "$1" 2>/dev/null | awk '{$1=$1;print}'; }
|
||||||
|
|
||||||
|
# _pid_alive <pid> <pidstart> -> 0 if the pid is live AND (when a start time was
|
||||||
|
# recorded) its current start time still matches — defeating pid reuse.
|
||||||
|
_pid_alive() {
|
||||||
|
local pid=$1 want=$2 cur
|
||||||
|
[[ -n "$pid" ]] || return 1
|
||||||
|
kill -0 "$pid" 2>/dev/null || return 1
|
||||||
|
[[ -z "$want" ]] && return 0
|
||||||
|
cur=$(_pidstart "$pid")
|
||||||
|
[[ "$cur" == "$want" ]]
|
||||||
|
}
|
||||||
|
|
||||||
# _dur_to_secs <dur> -> seconds. Accepts 90, 90s, 45m, 2h, 1d. Invalid/empty -> 0.
|
# _dur_to_secs <dur> -> seconds. Accepts 90, 90s, 45m, 2h, 1d. Invalid/empty -> 0.
|
||||||
_dur_to_secs() {
|
_dur_to_secs() {
|
||||||
local d=$1
|
local d=$1
|
||||||
@ -139,7 +154,8 @@ _meta_active() {
|
|||||||
grep -q '^ended=' "$f" && return 1
|
grep -q '^ended=' "$f" && return 1
|
||||||
pid=$(grep '^pid=' "$f" | head -1 | cut -d= -f2)
|
pid=$(grep '^pid=' "$f" | head -1 | cut -d= -f2)
|
||||||
if [[ -n "$pid" ]]; then
|
if [[ -n "$pid" ]]; then
|
||||||
kill -0 "$pid" 2>/dev/null
|
local pidstart; pidstart=$(grep '^pidstart=' "$f" | head -1 | cut -d= -f2-)
|
||||||
|
_pid_alive "$pid" "$pidstart"
|
||||||
return $?
|
return $?
|
||||||
fi
|
fi
|
||||||
mt=$(_mtime "$f"); age=$(( $(date +%s) - ${mt:-0} ))
|
mt=$(_mtime "$f"); age=$(( $(date +%s) - ${mt:-0} ))
|
||||||
@ -386,7 +402,7 @@ cmd_run() {
|
|||||||
echo "started=$(date +%s)"
|
echo "started=$(date +%s)"
|
||||||
} > "$STATE/$job.meta"
|
} > "$STATE/$job.meta"
|
||||||
run_worker "$doing_file" &
|
run_worker "$doing_file" &
|
||||||
echo "pid=$!" >> "$STATE/$job.meta"
|
{ echo "pid=$!"; echo "pidstart=$(_pidstart "$!")"; } >> "$STATE/$job.meta"
|
||||||
log "▶ launching $C_BOLD$job$C_RESET (engine=$w_eng, lock=$w_key)"
|
log "▶ launching $C_BOLD$job$C_RESET (engine=$w_eng, lock=$w_key)"
|
||||||
sleep 1
|
sleep 1
|
||||||
running=$(active_workers)
|
running=$(active_workers)
|
||||||
@ -420,8 +436,8 @@ cmd_status() {
|
|||||||
for f in "$STATE"/*.meta; do
|
for f in "$STATE"/*.meta; do
|
||||||
[[ -e "$f" ]] || continue
|
[[ -e "$f" ]] || continue
|
||||||
grep -q '^ended=' "$f" && continue
|
grep -q '^ended=' "$f" && continue
|
||||||
local pid; pid=$(grep '^pid=' "$f" | cut -d= -f2)
|
local pid pidstart; pid=$(grep '^pid=' "$f" | cut -d= -f2); pidstart=$(grep '^pidstart=' "$f" | cut -d= -f2-)
|
||||||
[[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null || continue
|
_pid_alive "$pid" "$pidstart" || continue
|
||||||
if ! $printed; then printf ' %sRUNNING%s\n' "$C_BOLD" "$C_RESET"; printed=true; fi
|
if ! $printed; then printf ' %sRUNNING%s\n' "$C_BOLD" "$C_RESET"; printed=true; fi
|
||||||
local job eng start now el last lmt age stall=""
|
local job eng start now el last lmt age stall=""
|
||||||
job=$(grep '^job=' "$f" | cut -d= -f2)
|
job=$(grep '^job=' "$f" | cut -d= -f2)
|
||||||
@ -450,12 +466,12 @@ cmd_dash() {
|
|||||||
|
|
||||||
cmd_stop() {
|
cmd_stop() {
|
||||||
ensure_dirs
|
ensure_dirs
|
||||||
local killed=0 f pid
|
local killed=0 f pid pidstart
|
||||||
for f in "$STATE"/*.meta; do
|
for f in "$STATE"/*.meta; do
|
||||||
[[ -e "$f" ]] || continue
|
[[ -e "$f" ]] || continue
|
||||||
grep -q '^ended=' "$f" && continue
|
grep -q '^ended=' "$f" && continue
|
||||||
pid=$(grep '^pid=' "$f" | cut -d= -f2)
|
pid=$(grep '^pid=' "$f" | cut -d= -f2); pidstart=$(grep '^pidstart=' "$f" | cut -d= -f2-)
|
||||||
[[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null && { kill "$pid" 2>/dev/null && killed=$((killed+1)); }
|
_pid_alive "$pid" "$pidstart" && { kill "$pid" 2>/dev/null && killed=$((killed+1)); }
|
||||||
done
|
done
|
||||||
[[ -f "$STATE/daemon.pid" ]] && kill "$(cat "$STATE/daemon.pid")" 2>/dev/null
|
[[ -f "$STATE/daemon.pid" ]] && kill "$(cat "$STATE/daemon.pid")" 2>/dev/null
|
||||||
rm -f "$STATE/daemon.pid"
|
rm -f "$STATE/daemon.pid"
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user