33c898ff9a
The async startJob previously relied on Node.js event listeners in the CLI process to capture child output and finalize the job file. But the CLI process exits immediately after returning the job ID, killing the event loop before the close handler fires — leaving jobs stuck at 'running' forever. Fix: startJob now spawns a companion watcher process (job-watcher.ts) that is itself detached and outlives the CLI. The watcher: - Spawns the actual client (codex/claude/opencode) - Captures stdout/stderr - Writes the final job record to disk on child exit - Has its own 10-minute timeout safety net The CLI returns the job ID immediately. The watcher independently finalizes the job. The CLI no longer needs to stay alive. Also updates tests to mock the watcher spawn via injectable spawnWatcher option.
288 lines
8.4 KiB
TypeScript
288 lines
8.4 KiB
TypeScript
import { spawn as defaultSpawn } from "node:child_process";
|
|
import { existsSync as defaultExistsSync, mkdirSync as defaultMkdirSync, readFileSync as defaultReadFileSync, readdirSync as defaultReaddirSync, statSync as defaultStatSync, unlinkSync as defaultUnlinkSync, writeFileSync as defaultWriteFileSync } from "node:fs";
|
|
import { randomUUID } from "node:crypto";
|
|
import { CLIENT_ARGS } from "./execute.js";
|
|
import type { ClientName, Job, JobRecord, JobStartOptions, ExecResult, JobStatus } from "./types.js";
|
|
import { JobNotFoundError, JobResultUnavailableError } from "./types.js";
|
|
|
|
export interface JobOperationsOptions {
|
|
jobDir?: string;
|
|
fs?: {
|
|
mkdirSync: typeof defaultMkdirSync;
|
|
writeFileSync: typeof defaultWriteFileSync;
|
|
readFileSync: typeof defaultReadFileSync;
|
|
readdirSync: typeof defaultReaddirSync;
|
|
existsSync: typeof defaultExistsSync;
|
|
statSync: typeof defaultStatSync;
|
|
unlinkSync: typeof defaultUnlinkSync;
|
|
};
|
|
}
|
|
|
|
export interface StartJobOptions extends JobStartOptions, JobOperationsOptions {
|
|
/** Override the watcher spawn (for testing). When provided, startJob calls
|
|
* this instead of spawning `node --import tsx job-watcher.ts ...` */
|
|
spawnWatcher?: (jobFilePath: string, command: string, args: string[]) => { pid?: number; unref?: () => void };
|
|
}
|
|
|
|
const DEFAULT_JOB_DIR = `${process.env.HOME || process.env.USERPROFILE}/.openclaw/ai-cli-dispatch/jobs`;
|
|
|
|
function getJobDir(options?: { jobDir?: string }): string {
|
|
return options?.jobDir ?? DEFAULT_JOB_DIR;
|
|
}
|
|
|
|
function writeJobFile(
|
|
jobDir: string,
|
|
record: JobRecord,
|
|
fs: NonNullable<JobOperationsOptions["fs"]>
|
|
): void {
|
|
fs.mkdirSync(jobDir, { recursive: true });
|
|
fs.writeFileSync(`${jobDir}/${record.id}.json`, JSON.stringify(record, null, 2));
|
|
}
|
|
|
|
function readJobFile(
|
|
jobId: string,
|
|
jobDir: string,
|
|
fs: NonNullable<JobOperationsOptions["fs"]>
|
|
): JobRecord {
|
|
const path = `${jobDir}/${jobId}.json`;
|
|
try {
|
|
return JSON.parse(fs.readFileSync(path, "utf-8")) as JobRecord;
|
|
} catch (err: any) {
|
|
if (err.code === "ENOENT") {
|
|
throw new JobNotFoundError(jobId);
|
|
}
|
|
throw err;
|
|
}
|
|
}
|
|
|
|
export async function startJob(
|
|
client: ClientName,
|
|
prompt: string,
|
|
options: StartJobOptions = {}
|
|
): Promise<Job> {
|
|
const jobId = randomUUID();
|
|
const jobDir = getJobDir(options);
|
|
const fs = options.fs ?? {
|
|
mkdirSync: defaultMkdirSync,
|
|
writeFileSync: defaultWriteFileSync,
|
|
readFileSync: defaultReadFileSync,
|
|
readdirSync: defaultReaddirSync,
|
|
existsSync: defaultExistsSync,
|
|
statSync: defaultStatSync,
|
|
unlinkSync: defaultUnlinkSync,
|
|
};
|
|
const spawnImpl = options.spawn ?? defaultSpawn;
|
|
|
|
const argBuilder = (CLIENT_ARGS as Record<string, (prompt: string) => string[]>)[client];
|
|
if (!argBuilder) {
|
|
const startedAt = new Date().toISOString();
|
|
const errRecord: JobRecord = {
|
|
id: jobId,
|
|
client,
|
|
prompt,
|
|
status: "failed",
|
|
startedAt,
|
|
completedAt: startedAt,
|
|
stdout: "",
|
|
stderr: "",
|
|
error: `Unknown client: ${client}`,
|
|
};
|
|
writeJobFile(jobDir, errRecord, fs);
|
|
return { id: jobId, client, prompt, status: "failed", startedAt, error: errRecord.error };
|
|
}
|
|
|
|
const clientArgs = argBuilder(prompt);
|
|
const command = options.clientPath ?? client;
|
|
const startedAt = new Date().toISOString();
|
|
|
|
const record: JobRecord = {
|
|
id: jobId,
|
|
client,
|
|
prompt,
|
|
status: "running",
|
|
startedAt,
|
|
pid: undefined,
|
|
stdout: "",
|
|
stderr: "",
|
|
};
|
|
|
|
writeJobFile(jobDir, record, fs);
|
|
|
|
// Spawn a companion watcher process that outlives this CLI invocation.
|
|
// The watcher monitors the actual client (codex/claude/opencode), captures
|
|
// stdout/stderr, and writes the final job record to disk on exit.
|
|
// This allows the CLI to return the job ID immediately while the watcher
|
|
// stays alive to finalize the job.
|
|
let watcher: { pid?: number; unref?: () => void };
|
|
|
|
if (options.spawnWatcher) {
|
|
// Test path: use the injected watcher mock
|
|
watcher = options.spawnWatcher(`${jobDir}/${jobId}.json`, command, clientArgs);
|
|
} else {
|
|
const watcherArgs = [
|
|
"--import", "tsx",
|
|
new URL("./job-watcher.ts", import.meta.url).pathname,
|
|
`${jobDir}/${jobId}.json`,
|
|
command,
|
|
...clientArgs,
|
|
];
|
|
watcher = spawnImpl("node", watcherArgs, {
|
|
detached: true,
|
|
shell: false,
|
|
stdio: "ignore",
|
|
});
|
|
watcher.unref?.();
|
|
}
|
|
|
|
// Give the watcher a tick to spawn and record the real child PID
|
|
await new Promise((r) => setTimeout(r, 100));
|
|
|
|
// Re-read the job file to pick up the watcher's PID update
|
|
let updatedRecord: JobRecord;
|
|
try {
|
|
updatedRecord = JSON.parse(fs.readFileSync(`${jobDir}/${jobId}.json`, "utf-8")) as JobRecord;
|
|
} catch {
|
|
updatedRecord = record;
|
|
}
|
|
|
|
return {
|
|
id: jobId,
|
|
client,
|
|
prompt,
|
|
status: "running",
|
|
startedAt,
|
|
pid: updatedRecord.pid ?? watcher.pid ?? undefined,
|
|
};
|
|
}
|
|
|
|
export function getJob(jobId: string, options: JobOperationsOptions = {}): Job {
|
|
const jobDir = getJobDir(options);
|
|
const fs = options.fs ?? {
|
|
mkdirSync: defaultMkdirSync,
|
|
writeFileSync: defaultWriteFileSync,
|
|
readFileSync: defaultReadFileSync,
|
|
readdirSync: defaultReaddirSync,
|
|
existsSync: defaultExistsSync,
|
|
statSync: defaultStatSync,
|
|
unlinkSync: defaultUnlinkSync,
|
|
};
|
|
const record = readJobFile(jobId, jobDir, fs);
|
|
const { stdout: _stdout, stderr: _stderr, ...job } = record;
|
|
return job;
|
|
}
|
|
|
|
export function getJobResult(jobId: string, options: JobOperationsOptions = {}): ExecResult {
|
|
const job = getJob(jobId, options);
|
|
if (job.status !== "completed") {
|
|
throw new JobResultUnavailableError(jobId, job.status);
|
|
}
|
|
return job.result!;
|
|
}
|
|
|
|
export function cancelJob(jobId: string, options: JobOperationsOptions = {}): void {
|
|
const jobDir = getJobDir(options);
|
|
const fs = options.fs ?? {
|
|
mkdirSync: defaultMkdirSync,
|
|
writeFileSync: defaultWriteFileSync,
|
|
readFileSync: defaultReadFileSync,
|
|
readdirSync: defaultReaddirSync,
|
|
existsSync: defaultExistsSync,
|
|
statSync: defaultStatSync,
|
|
unlinkSync: defaultUnlinkSync,
|
|
};
|
|
|
|
const record = readJobFile(jobId, jobDir, fs);
|
|
if (record.status !== "running") {
|
|
return;
|
|
}
|
|
|
|
// Kill the client child process (PID recorded by the watcher)
|
|
if (record.pid) {
|
|
try {
|
|
process.kill(record.pid, "SIGTERM");
|
|
} catch {
|
|
// ignore — process may have already exited
|
|
}
|
|
}
|
|
|
|
// Update the job file to cancelled
|
|
const cancelledRecord: JobRecord = {
|
|
...record,
|
|
status: "cancelled",
|
|
completedAt: new Date().toISOString(),
|
|
};
|
|
writeJobFile(jobDir, cancelledRecord, fs);
|
|
}
|
|
|
|
export function listJobs(options: JobOperationsOptions & { filter?: JobStatus } = {}): Job[] {
|
|
const jobDir = getJobDir(options);
|
|
const fs = options.fs ?? {
|
|
mkdirSync: defaultMkdirSync,
|
|
writeFileSync: defaultWriteFileSync,
|
|
readFileSync: defaultReadFileSync,
|
|
readdirSync: defaultReaddirSync,
|
|
existsSync: defaultExistsSync,
|
|
statSync: defaultStatSync,
|
|
unlinkSync: defaultUnlinkSync,
|
|
};
|
|
|
|
if (!fs.existsSync(jobDir)) {
|
|
return [];
|
|
}
|
|
|
|
const entries = fs.readdirSync(jobDir).filter((f) => f.endsWith(".json"));
|
|
const jobs: Job[] = [];
|
|
|
|
for (const entry of entries) {
|
|
const jobId = entry.replace(/\.json$/, "");
|
|
try {
|
|
const record = readJobFile(jobId, jobDir, fs);
|
|
const { stdout: _stdout, stderr: _stderr, ...job } = record;
|
|
jobs.push(job);
|
|
} catch {
|
|
// ignore corrupt/missing files
|
|
}
|
|
}
|
|
|
|
jobs.sort((a, b) => new Date(b.startedAt).getTime() - new Date(a.startedAt).getTime());
|
|
|
|
if (options.filter) {
|
|
return jobs.filter((j) => j.status === options.filter);
|
|
}
|
|
|
|
return jobs;
|
|
}
|
|
|
|
export function cleanupJobs(options: JobOperationsOptions & { maxAgeMs?: number } = {}): void {
|
|
const jobDir = getJobDir(options);
|
|
const fs = options.fs ?? {
|
|
mkdirSync: defaultMkdirSync,
|
|
writeFileSync: defaultWriteFileSync,
|
|
readFileSync: defaultReadFileSync,
|
|
readdirSync: defaultReaddirSync,
|
|
existsSync: defaultExistsSync,
|
|
statSync: defaultStatSync,
|
|
unlinkSync: defaultUnlinkSync,
|
|
};
|
|
const maxAgeMs = options.maxAgeMs ?? 24 * 60 * 60 * 1000;
|
|
|
|
if (!fs.existsSync(jobDir)) {
|
|
return;
|
|
}
|
|
|
|
const now = Date.now();
|
|
const entries = fs.readdirSync(jobDir).filter((f) => f.endsWith(".json"));
|
|
|
|
for (const entry of entries) {
|
|
const path = `${jobDir}/${entry}`;
|
|
try {
|
|
const stat = fs.statSync(path);
|
|
if (now - stat.mtimeMs > maxAgeMs) {
|
|
fs.unlinkSync(path);
|
|
}
|
|
} catch {
|
|
// ignore
|
|
}
|
|
}
|
|
}
|