fix: harden operator action lease release + idempotent terminal actions
- release lease with fenced epoch (leaseEpoch+1, clear holder) so a stale renewal cannot resurrect a held lease after operator displacement - reject on dead_letter / cancel on failed are now idempotent no-ops (no epoch bump, no duplicate event) - add coordinator test for terminal idempotency Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
parent
283383561c
commit
c8ab43d3ae
@ -820,4 +820,21 @@ describe('fleet coordinator — Phase 3 per-product budgets', () => {
|
|||||||
expect(missing.ok).toBe(false);
|
expect(missing.ok).toBe(false);
|
||||||
if (!missing.ok) expect(missing.reason).toBe('not_found');
|
if (!missing.ok) expect(missing.reason).toBe('not_found');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('operator reject/cancel are idempotent on already-terminal jobs (no epoch bump)', async () => {
|
||||||
|
const { job } = await coord.submitJob(PID, input());
|
||||||
|
await coord.claimNextJob(factory());
|
||||||
|
await coord.operatorAction(job.id, PID, 'reject');
|
||||||
|
const afterFirst = await repo.getJob(job.id, PID);
|
||||||
|
const epochAfterReject = afterFirst!.leaseEpoch;
|
||||||
|
|
||||||
|
// re-reject is a no-op: stage stays dead_letter, epoch unchanged, no new event
|
||||||
|
const again = await coord.operatorAction(job.id, PID, 'reject');
|
||||||
|
expect(again.ok).toBe(true);
|
||||||
|
const afterSecond = await repo.getJob(job.id, PID);
|
||||||
|
expect(afterSecond?.stage).toBe('dead_letter');
|
||||||
|
expect(afterSecond?.leaseEpoch).toBe(epochAfterReject);
|
||||||
|
const rejectEvents = (await repo.listEvents(job.id)).filter(e => e.type === 'operator_action');
|
||||||
|
expect(rejectEvents).toHaveLength(1);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@ -859,6 +859,11 @@ export async function operatorAction(
|
|||||||
if (!job) return { ok: false, reason: 'not_found' };
|
if (!job) return { ok: false, reason: 'not_found' };
|
||||||
if (job.stage === 'shipped') return { ok: false, reason: 'invalid_state' };
|
if (job.stage === 'shipped') return { ok: false, reason: 'invalid_state' };
|
||||||
|
|
||||||
|
// Idempotent terminal actions: re-rejecting a dead_letter / re-cancelling a
|
||||||
|
// failed job is a no-op (don't bump the epoch or append a duplicate event).
|
||||||
|
if (action === 'reject' && job.stage === 'dead_letter') return { ok: true, doc: job };
|
||||||
|
if (action === 'cancel' && job.stage === 'failed') return { ok: true, doc: job };
|
||||||
|
|
||||||
const newEpoch = job.leaseEpoch + 1; // fence any current holder
|
const newEpoch = job.leaseEpoch + 1; // fence any current holder
|
||||||
|
|
||||||
let stage: FleetStage;
|
let stage: FleetStage;
|
||||||
@ -882,10 +887,15 @@ export async function operatorAction(
|
|||||||
return { ok: false, reason: res.reason === 'not_found' ? 'not_found' : 'conflict' };
|
return { ok: false, reason: res.reason === 'not_found' ? 'not_found' : 'conflict' };
|
||||||
}
|
}
|
||||||
|
|
||||||
// Free the seat: release any still-held lease (best-effort, fenced by newEpoch).
|
// Free the seat: release any still-held lease, fencing it with the new epoch so
|
||||||
|
// a stale renewal cannot resurrect it (mirrors the reaper, §25.3).
|
||||||
const lease = await repo.getLease(jobId);
|
const lease = await repo.getLease(jobId);
|
||||||
if (lease && lease.status === 'held') {
|
if (lease && lease.status === 'held') {
|
||||||
await repo.revUpdateLease(jobId, lease.rev, { status: 'released' });
|
await repo.revUpdateLease(jobId, lease.rev, {
|
||||||
|
status: 'released',
|
||||||
|
leaseEpoch: newEpoch,
|
||||||
|
holderFactoryId: undefined,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
await repo.appendEvent({
|
await repo.appendEvent({
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user