Files
stefano 33c898ff9a fix: use companion watcher process for async job completion
The async startJob previously relied on Node.js event listeners in the
CLI process to capture child output and finalize the job file. But the
CLI process exits immediately after returning the job ID, killing the
event loop before the close handler fires — leaving jobs stuck at
'running' forever.

Fix: startJob now spawns a companion watcher process (job-watcher.ts)
that is itself detached and outlives the CLI. The watcher:
- Spawns the actual client (codex/claude/opencode)
- Captures stdout/stderr
- Writes the final job record to disk on child exit
- Has its own 10-minute timeout safety net

The CLI returns the job ID immediately. The watcher independently
finalizes the job. The CLI no longer needs to stay alive.

Also updates tests to mock the watcher spawn via injectable
spawnWatcher option.
2026-05-20 14:08:44 -05:00

739 lines
22 KiB
TypeScript

import { describe, it } from "node:test";
import assert from "node:assert";
import { EventEmitter } from "node:events";
import { Readable } from "node:stream";
import {
startJob,
getJob,
getJobResult,
cancelJob,
listJobs,
cleanupJobs,
} from "../src/jobs.js";
import { JobNotFoundError, JobResultUnavailableError } from "../src/types.js";
import type { ClientName, JobRecord, JobStatus } from "../src/types.js";
interface MockScenario {
stdout?: string;
stderr?: string;
exitCode?: number;
error?: NodeJS.ErrnoException;
hang?: boolean;
}
function createMockChildProcess(scenario: MockScenario): any {
const child = new EventEmitter() as any;
child.pid = 12345;
child.stdout = Readable.from(
scenario.stdout !== undefined ? [scenario.stdout] : []
);
child.stderr = Readable.from(
scenario.stderr !== undefined ? [scenario.stderr] : []
);
child.killed = false;
child.kill = (signal: string = "SIGTERM") => {
child.killed = true;
process.nextTick(() => {
child.emit("exit", null, signal);
child.emit("close", null, signal);
});
return true;
};
child.unref = () => {};
let stdoutEnded = scenario.stdout === undefined;
let stderrEnded = scenario.stderr === undefined;
child.stdout.on("end", () => {
stdoutEnded = true;
maybeClose();
});
child.stderr.on("end", () => {
stderrEnded = true;
maybeClose();
});
function maybeClose() {
if (stdoutEnded && stderrEnded && !scenario.hang && !scenario.error) {
child.emit("exit", scenario.exitCode ?? 0, null);
child.emit("close", scenario.exitCode ?? 0, null);
}
}
process.nextTick(() => {
if (scenario.error) {
child.emit("error", scenario.error);
}
});
return child;
}
function createMockFs() {
const files = new Map<string, string>();
const dirs = new Set<string>();
const fs = {
mkdirSync: (p: string, _opts?: any) => {
dirs.add(p);
},
writeFileSync: (p: string, data: string) => {
files.set(p, data);
},
readFileSync: (p: string, _opts?: any): string => {
if (!files.has(p)) {
const err = Object.assign(new Error(`ENOENT: ${p}`), { code: "ENOENT" });
throw err;
}
return files.get(p)!;
},
readdirSync: (p: string): string[] => {
const result: string[] = [];
for (const f of files.keys()) {
const dir = f.substring(0, f.lastIndexOf("/"));
if (dir === p) {
result.push(f.substring(f.lastIndexOf("/") + 1));
}
}
return result;
},
existsSync: (p: string): boolean => {
return files.has(p) || dirs.has(p);
},
statSync: (p: string): { mtimeMs: number; isFile: () => boolean } => {
if (!files.has(p)) {
const err = Object.assign(new Error(`ENOENT: ${p}`), { code: "ENOENT" });
throw err;
}
const data = files.get(p)!;
// Use startedAt from JSON for deterministic age tests
try {
const record = JSON.parse(data) as JobRecord;
return { mtimeMs: new Date(record.startedAt).getTime(), isFile: () => true };
} catch {
return { mtimeMs: Date.now(), isFile: () => true };
}
},
unlinkSync: (p: string) => {
files.delete(p);
},
__files: files,
__dirs: dirs,
};
return fs;
}
function mockSpawn(scenarios: Map<string, MockScenario>): any {
return (cmd: string, args: string[], _opts: any): any => {
const key = [cmd, ...args].join(" ");
const scenario = scenarios.get(key);
if (!scenario) {
return createMockChildProcess({
error: Object.assign(new Error("spawn ENOENT"), { code: "ENOENT" }),
});
}
return createMockChildProcess(scenario);
};
}
/**
* Creates a mock spawnWatcher that simulates the job-watcher.ts behavior:
* spawns the mock child, captures output, and writes the final job record.
*/
function createMockWatcher(
scenarios: Map<string, MockScenario>,
fs: ReturnType<typeof createMockFs>,
opts?: { watcherTimeoutMs?: number }
): (jobFilePath: string, command: string, args: string[]) => { pid: number; unref: () => void } {
return (jobFilePath: string, command: string, clientArgs: string[]) => {
const key = [command, ...clientArgs].join(" ");
const scenario = scenarios.get(key) ?? { error: Object.assign(new Error("spawn ENOENT"), { code: "ENOENT" }) };
const child = createMockChildProcess(scenario);
const watcherPid = 99999;
let stdout = "";
let stderr = "";
child.stdout?.on("data", (chunk: Buffer | string) => { stdout += chunk.toString(); });
child.stderr?.on("data", (chunk: Buffer | string) => { stderr += chunk.toString(); });
child.on("close", (code: number | null) => {
// Read current record, update with results
let record: JobRecord;
try {
record = JSON.parse(fs.readFileSync(jobFilePath)) as JobRecord;
} catch {
return;
}
const durationMs = Date.now() - new Date(record.startedAt).getTime();
const status = code === 0 || code === null ? "completed" : "failed";
record.status = status;
record.stdout = stdout;
record.stderr = stderr;
record.completedAt = new Date().toISOString();
record.result = { stdout, stderr, exitCode: code ?? 0, client: record.client, durationMs };
if (scenario.error) {
record.status = "failed";
record.error = scenario.error.message;
}
fs.writeFileSync(jobFilePath, JSON.stringify(record));
});
child.on("error", (err: NodeJS.ErrnoException) => {
let record: JobRecord;
try {
record = JSON.parse(fs.readFileSync(jobFilePath)) as JobRecord;
} catch {
return;
}
record.status = "failed";
record.error = err.message;
record.completedAt = new Date().toISOString();
fs.writeFileSync(jobFilePath, JSON.stringify(record));
});
// For hang scenarios, simulate a timeout by writing timed_out after a delay
if (scenario.hang) {
const watcherTimeout = opts?.watcherTimeoutMs ?? 30;
setTimeout(() => {
try {
const existing = JSON.parse(fs.readFileSync(jobFilePath)) as JobRecord;
// Don't overwrite if already cancelled/completed
if (existing.status !== "running") return;
existing.status = "timed_out";
existing.completedAt = new Date().toISOString();
existing.result = { stdout, stderr, exitCode: -1, client: existing.client, durationMs: watcherTimeout };
fs.writeFileSync(jobFilePath, JSON.stringify(existing));
} catch { /* ignore */ }
}, watcherTimeout);
}
// Simulate watcher updating the PID in the job file
try {
const record = JSON.parse(fs.readFileSync(jobFilePath)) as JobRecord;
record.pid = child.pid;
fs.writeFileSync(jobFilePath, JSON.stringify(record));
} catch { /* ignore */ }
return { pid: watcherPid, unref: () => {} };
};
}
function createJobTestHelper(scenarios: Map<string, MockScenario>, jobDir: string) {
const fs = createMockFs();
const spawnWatcher = createMockWatcher(scenarios, fs);
const spawn = mockSpawn(scenarios);
return { fs, spawn, spawnWatcher, jobDir };
}
function readJobRecord(fs: ReturnType<typeof createMockFs>, path: string): JobRecord {
return JSON.parse(fs.readFileSync(path)) as JobRecord;
}
function delay(ms: number): Promise<void> {
return new Promise((r) => setTimeout(r, ms));
}
describe("startJob", () => {
it("spawns a detached child process and returns a running Job", async () => {
const scenarios = new Map<string, MockScenario>([
["codex exec --yolo hello world", { stdout: "ok\n", exitCode: 0 }],
]);
const fs = createMockFs();
const jobDir = "/tmp/jobs";
const job = await startJob("codex", "hello world", {
jobDir,
spawn: mockSpawn(scenarios),
spawnWatcher: createMockWatcher(scenarios, fs),
fs,
});
assert.strictEqual(job.client, "codex");
assert.strictEqual(job.prompt, "hello world");
assert.strictEqual(job.status, "running");
assert.strictEqual(typeof job.id, "string");
assert.ok(job.id.length > 0);
assert.strictEqual(typeof job.pid, "number");
assert.ok(typeof job.startedAt === "string");
// Wait for child to finish
await delay(50);
const record = readJobRecord(fs, `${jobDir}/${job.id}.json`);
assert.strictEqual(record.status, "completed");
assert.strictEqual(record.stdout, "ok\n");
assert.strictEqual(record.result?.exitCode, 0);
assert.ok(typeof record.completedAt === "string");
});
it("generates unique ids for concurrent jobs", async () => {
const scenarios = new Map<string, MockScenario>([
["codex exec --yolo a", { stdout: "a", exitCode: 0 }],
["codex exec --yolo b", { stdout: "b", exitCode: 0 }],
]);
const fs = createMockFs();
const jobDir = "/tmp/jobs";
const job1 = await startJob("codex", "a", {
jobDir,
spawn: mockSpawn(scenarios),
spawnWatcher: createMockWatcher(scenarios, fs),
fs,
});
const job2 = await startJob("codex", "b", {
jobDir,
spawn: mockSpawn(scenarios),
spawnWatcher: createMockWatcher(scenarios, fs),
fs,
});
assert.notStrictEqual(job1.id, job2.id);
});
it("sets status to timed_out when timeout is exceeded", async () => {
const scenarios = new Map<string, MockScenario>([
["codex exec --yolo slow", { hang: true }],
]);
const fs = createMockFs();
const jobDir = "/tmp/jobs";
const job = await startJob("codex", "slow", {
jobDir,
spawn: mockSpawn(scenarios),
spawnWatcher: createMockWatcher(scenarios, fs),
fs,
timeoutMs: 20,
});
// Wait for timeout + processing
await delay(100);
const record = readJobRecord(fs, `${jobDir}/${job.id}.json`);
assert.strictEqual(record.status, "timed_out");
});
it("sets status to failed on non-zero exit code", async () => {
const scenarios = new Map<string, MockScenario>([
["codex exec --yolo fail", { stdout: "", stderr: "err", exitCode: 1 }],
]);
const fs = createMockFs();
const jobDir = "/tmp/jobs";
const job = await startJob("codex", "fail", {
jobDir,
spawn: mockSpawn(scenarios),
spawnWatcher: createMockWatcher(scenarios, fs),
fs,
});
await delay(50);
const record = readJobRecord(fs, `${jobDir}/${job.id}.json`);
assert.strictEqual(record.status, "failed");
assert.strictEqual(record.result?.exitCode, 1);
assert.strictEqual(record.stderr, "err");
});
it("sets status to failed on spawn ENOENT", async () => {
const scenarios = new Map<string, MockScenario>();
const fs = createMockFs();
const jobDir = "/tmp/jobs";
const job = await startJob("codex", "hello", {
jobDir,
spawn: mockSpawn(scenarios),
spawnWatcher: createMockWatcher(scenarios, fs),
fs,
});
await delay(50);
const record = readJobRecord(fs, `${jobDir}/${job.id}.json`);
assert.strictEqual(record.status, "failed");
assert.ok(record.error);
});
});
describe("getJob", () => {
it("returns the current Job state from disk", async () => {
const scenarios = new Map<string, MockScenario>([
["codex exec --yolo hello", { hang: true }],
]);
const fs = createMockFs();
const jobDir = "/tmp/jobs";
const job = await startJob("codex", "hello", {
jobDir,
spawn: mockSpawn(scenarios),
spawnWatcher: createMockWatcher(scenarios, fs, { watcherTimeoutMs: 5000 }),
fs,
});
const fetched = getJob(job.id, { jobDir, fs });
assert.strictEqual(fetched.id, job.id);
assert.strictEqual(fetched.status, "running");
await delay(50);
// Job should still be running since watcher timeout is 5s
const after = getJob(job.id, { jobDir, fs });
assert.strictEqual(after.status, "running");
});
it("throws JobNotFoundError for nonexistent job", () => {
const fs = createMockFs();
assert.throws(
() => getJob("missing", { jobDir: "/tmp/jobs", fs }),
(err: unknown) => err instanceof JobNotFoundError
);
});
});
describe("getJobResult", () => {
it("returns ExecResult when job is completed", async () => {
const scenarios = new Map<string, MockScenario>([
["codex exec --yolo hello", { stdout: "ok", exitCode: 0 }],
]);
const fs = createMockFs();
const jobDir = "/tmp/jobs";
const job = await startJob("codex", "hello", {
jobDir,
spawn: mockSpawn(scenarios),
spawnWatcher: createMockWatcher(scenarios, fs),
fs,
});
await delay(50);
const result = getJobResult(job.id, { jobDir, fs });
assert.strictEqual(result.stdout, "ok");
assert.strictEqual(result.exitCode, 0);
});
it("throws JobResultUnavailableError when job is still running", async () => {
const scenarios = new Map<string, MockScenario>([
["codex exec --yolo hello", { hang: true }],
]);
const fs = createMockFs();
const jobDir = "/tmp/jobs";
const job = await startJob("codex", "hello", {
jobDir,
spawn: mockSpawn(scenarios),
spawnWatcher: createMockWatcher(scenarios, fs),
fs,
timeoutMs: 50,
});
assert.throws(
() => getJobResult(job.id, { jobDir, fs }),
(err: unknown) => err instanceof JobResultUnavailableError
);
});
it("throws JobResultUnavailableError when job failed", async () => {
const scenarios = new Map<string, MockScenario>([
["codex exec --yolo fail", { stdout: "", stderr: "err", exitCode: 1 }],
]);
const fs = createMockFs();
const jobDir = "/tmp/jobs";
const job = await startJob("codex", "fail", {
jobDir,
spawn: mockSpawn(scenarios),
spawnWatcher: createMockWatcher(scenarios, fs),
fs,
});
await delay(50);
assert.throws(
() => getJobResult(job.id, { jobDir, fs }),
(err: unknown) => err instanceof JobResultUnavailableError
);
});
it("throws JobNotFoundError for nonexistent job", () => {
const fs = createMockFs();
assert.throws(
() => getJobResult("missing", { jobDir: "/tmp/jobs", fs }),
(err: unknown) => err instanceof JobNotFoundError
);
});
});
describe("cancelJob", () => {
it("sends SIGTERM and updates status to cancelled", async () => {
const scenarios = new Map<string, MockScenario>([
["codex exec --yolo hello", { hang: true }],
]);
const fs = createMockFs();
const jobDir = "/tmp/jobs";
const job = await startJob("codex", "hello", {
jobDir,
spawn: mockSpawn(scenarios),
spawnWatcher: createMockWatcher(scenarios, fs, { watcherTimeoutMs: 5000 }),
fs,
});
cancelJob(job.id, { jobDir, fs });
await delay(50);
const record = readJobRecord(fs, `${jobDir}/${job.id}.json`);
assert.strictEqual(record.status, "cancelled");
});
it("throws JobNotFoundError for nonexistent job", () => {
const fs = createMockFs();
assert.throws(
() => cancelJob("missing", { jobDir: "/tmp/jobs", fs }),
(err: unknown) => err instanceof JobNotFoundError
);
});
it("is a no-op when job is not running", () => {
const fs = createMockFs();
const jobDir = "/tmp/jobs";
const now = new Date().toISOString();
const completedRecord: JobRecord = {
id: "job-completed",
client: "codex",
prompt: "done",
status: "completed",
startedAt: now,
completedAt: now,
stdout: "ok",
stderr: "",
result: {
stdout: "ok",
stderr: "",
exitCode: 0,
client: "codex",
durationMs: 100,
},
};
fs.mkdirSync(jobDir, { recursive: true });
fs.writeFileSync(`${jobDir}/job-completed.json`, JSON.stringify(completedRecord));
cancelJob("job-completed", { jobDir, fs });
const record = readJobRecord(fs, `${jobDir}/job-completed.json`);
assert.strictEqual(record.status, "completed");
});
});
describe("listJobs", () => {
it("returns all jobs sorted by startedAt desc", async () => {
const scenarios = new Map<string, MockScenario>([
["codex exec --yolo a", { stdout: "a", exitCode: 0 }],
["codex exec --yolo b", { stdout: "b", exitCode: 0 }],
]);
const fs = createMockFs();
const jobDir = "/tmp/jobs";
const job1 = await startJob("codex", "a", {
jobDir,
spawn: mockSpawn(scenarios),
spawnWatcher: createMockWatcher(scenarios, fs),
fs,
});
await delay(20);
const job2 = await startJob("codex", "b", {
jobDir,
spawn: mockSpawn(scenarios),
spawnWatcher: createMockWatcher(scenarios, fs),
fs,
});
await delay(50);
const jobs = listJobs({ jobDir, fs });
assert.strictEqual(jobs.length, 2);
assert.strictEqual(jobs[0].id, job2.id);
assert.strictEqual(jobs[1].id, job1.id);
});
it("returns empty array when jobDir does not exist", () => {
const fs = createMockFs();
const jobs = listJobs({ jobDir: "/tmp/jobs", fs });
assert.deepStrictEqual(jobs, []);
});
it("ignores corrupt or unreadable job files", () => {
const fs = createMockFs();
const jobDir = "/tmp/jobs";
const now = new Date().toISOString();
const validRecord: JobRecord = {
id: "job-valid",
client: "codex",
prompt: "ok",
status: "completed",
startedAt: now,
completedAt: now,
stdout: "ok",
stderr: "",
result: {
stdout: "ok",
stderr: "",
exitCode: 0,
client: "codex",
durationMs: 100,
},
};
fs.mkdirSync(jobDir, { recursive: true });
fs.writeFileSync(`${jobDir}/job-valid.json`, JSON.stringify(validRecord));
fs.writeFileSync(`${jobDir}/job-corrupt.json`, "not-json");
const jobs = listJobs({ jobDir, fs });
assert.strictEqual(jobs.length, 1);
assert.strictEqual(jobs[0].id, "job-valid");
});
it("filters jobs by status when filter is provided", () => {
const fs = createMockFs();
const jobDir = "/tmp/jobs";
const now = new Date().toISOString();
const runningRecord: JobRecord = {
id: "job-running",
client: "codex",
prompt: "run",
status: "running",
startedAt: now,
stdout: "",
stderr: "",
};
const completedRecord: JobRecord = {
id: "job-completed",
client: "claude",
prompt: "done",
status: "completed",
startedAt: now,
completedAt: now,
stdout: "ok",
stderr: "",
result: {
stdout: "ok",
stderr: "",
exitCode: 0,
client: "claude",
durationMs: 100,
},
};
const failedRecord: JobRecord = {
id: "job-failed",
client: "opencode",
prompt: "fail",
status: "failed",
startedAt: now,
completedAt: now,
stdout: "",
stderr: "err",
result: {
stdout: "",
stderr: "err",
exitCode: 1,
client: "opencode",
durationMs: 100,
},
};
fs.mkdirSync(jobDir, { recursive: true });
fs.writeFileSync(`${jobDir}/job-running.json`, JSON.stringify(runningRecord));
fs.writeFileSync(`${jobDir}/job-completed.json`, JSON.stringify(completedRecord));
fs.writeFileSync(`${jobDir}/job-failed.json`, JSON.stringify(failedRecord));
const all = listJobs({ jobDir, fs });
assert.strictEqual(all.length, 3);
const running = listJobs({ jobDir, fs, filter: "running" });
assert.strictEqual(running.length, 1);
assert.strictEqual(running[0].id, "job-running");
const completed = listJobs({ jobDir, fs, filter: "completed" });
assert.strictEqual(completed.length, 1);
assert.strictEqual(completed[0].id, "job-completed");
const failed = listJobs({ jobDir, fs, filter: "failed" });
assert.strictEqual(failed.length, 1);
assert.strictEqual(failed[0].id, "job-failed");
});
});
describe("cleanupJobs", () => {
it("deletes job files older than maxAgeMs", async () => {
const scenarios = new Map<string, MockScenario>([
["codex exec --yolo old", { stdout: "old", exitCode: 0 }],
["codex exec --yolo new", { stdout: "new", exitCode: 0 }],
]);
const fs = createMockFs();
const jobDir = "/tmp/jobs";
// Create an old job by manipulating startedAt after creation
const oldJob = await startJob("codex", "old", {
jobDir,
spawn: mockSpawn(scenarios),
spawnWatcher: createMockWatcher(scenarios, fs),
fs,
});
await delay(50);
const newJob = await startJob("codex", "new", {
jobDir,
spawn: mockSpawn(scenarios),
spawnWatcher: createMockWatcher(scenarios, fs),
fs,
});
await delay(50);
// Make old job appear 25h old by patching its record
const oldPath = `${jobDir}/${oldJob.id}.json`;
const oldRecord = readJobRecord(fs, oldPath);
oldRecord.startedAt = new Date(Date.now() - 25 * 60 * 60 * 1000).toISOString();
fs.writeFileSync(oldPath, JSON.stringify(oldRecord));
cleanupJobs({ jobDir, fs, maxAgeMs: 24 * 60 * 60 * 1000 });
assert.strictEqual(fs.existsSync(oldPath), false);
assert.strictEqual(fs.existsSync(`${jobDir}/${newJob.id}.json`), true);
});
it("uses default maxAge of 24 hours", async () => {
const scenarios = new Map<string, MockScenario>([
["codex exec --yolo old", { stdout: "old", exitCode: 0 }],
]);
const fs = createMockFs();
const jobDir = "/tmp/jobs";
const job = await startJob("codex", "old", {
jobDir,
spawn: mockSpawn(scenarios),
spawnWatcher: createMockWatcher(scenarios, fs),
fs,
});
await delay(50);
const path = `${jobDir}/${job.id}.json`;
const record = readJobRecord(fs, path);
record.startedAt = new Date(Date.now() - 25 * 60 * 60 * 1000).toISOString();
fs.writeFileSync(path, JSON.stringify(record));
cleanupJobs({ jobDir, fs });
assert.strictEqual(fs.existsSync(path), false);
});
it("is a no-op when jobDir does not exist", () => {
const fs = createMockFs();
// Should not throw
cleanupJobs({ jobDir: "/tmp/jobs", fs });
assert.strictEqual(fs.existsSync("/tmp/jobs"), false);
});
});