fix: filter codex ReasoningSummary stderr noise on exit code 0

Codex writes informational ERROR messages about ReasoningSummaryDelta
to stderr even on successful execution (exit code 0). The OpenClaw
agent misinterprets this non-empty stderr as a failure.

- Add filterStderrNoise() to strip known codex noise patterns from
  stderr when exit code is 0
- Preserve raw stderr in DebugInfo.rawStderr when --debug is active
- Add 5 new tests covering noise filtering, preservation on failure,
  debug raw output, and non-codex client passthrough
This commit is contained in:
2026-05-20 13:37:21 -05:00
parent edb6611b74
commit afac143cb3
3 changed files with 110 additions and 3 deletions
+29 -3
View File
@@ -9,6 +9,28 @@ export const CLIENT_ARGS: Record<ClientName, (prompt: string) => string[]> = {
opencode: (p) => ["run", "--dangerously-skip-permissions", p], opencode: (p) => ["run", "--dangerously-skip-permissions", p],
}; };
/**
* Known stderr noise patterns per client.
* When exit code is 0, lines matching these patterns are stripped from the
* returned stderr to prevent agents from misinterpreting informational
* diagnostics as errors. The raw (unfiltered) stderr is preserved in
* DebugInfo.rawStderr when --debug is active.
*/
const STDERR_NOISE_PATTERNS: Partial<Record<ClientName, RegExp[]>> = {
codex: [
/^\d{4}-\d{2}-\d{2}T[\d:.]+Z\s+ERROR\s+codex_core::util:\s+ReasoningSummary\w*\s+/,
],
};
function filterStderrNoise(client: ClientName, stderr: string, exitCode: number): string {
if (exitCode !== 0) return stderr;
const patterns = STDERR_NOISE_PATTERNS[client];
if (!patterns) return stderr;
const lines = stderr.split("\n");
const filtered = lines.filter((line) => !patterns.some((p) => p.test(line)));
return filtered.join("\n").replace(/\n+$/, "");
}
export async function executePrompt( export async function executePrompt(
client: ClientName, client: ClientName,
prompt: string, prompt: string,
@@ -78,6 +100,8 @@ export async function executePrompt(
settled = true; settled = true;
clearTimeout(timeout); clearTimeout(timeout);
const durationMs = Date.now() - startMs; const durationMs = Date.now() - startMs;
const rawStderr = stderr;
const cleanedStderr = filterStderrNoise(client, rawStderr, result?.exitCode ?? -1);
if (options.debug || options.onDebug) { if (options.debug || options.onDebug) {
const effectiveExitCode = result?.exitCode ?? (err instanceof ExecError ? err.result.exitCode : null); const effectiveExitCode = result?.exitCode ?? (err instanceof ExecError ? err.result.exitCode : null);
const debugInfo: DebugInfo = { const debugInfo: DebugInfo = {
@@ -87,20 +111,22 @@ export async function executePrompt(
exitCode: effectiveExitCode, exitCode: effectiveExitCode,
exitSignal, exitSignal,
durationMs, durationMs,
stderrLength: stderr.length, stderrLength: rawStderr.length,
stdoutLength: stdout.length, stdoutLength: stdout.length,
noisySuccess: effectiveExitCode === 0 && stderr.length > 0, noisySuccess: effectiveExitCode === 0 && rawStderr.length > 0,
rawStderr: rawStderr !== cleanedStderr ? rawStderr : undefined,
}; };
options.onDebug?.(debugInfo); options.onDebug?.(debugInfo);
} }
if (err) { if (err) {
if (err instanceof ExecError) { if (err instanceof ExecError) {
err.result.stderr = cleanedStderr;
err.result.client = client; err.result.client = client;
err.result.durationMs = durationMs; err.result.durationMs = durationMs;
} }
reject(err); reject(err);
} else { } else {
resolve({ ...result!, client, durationMs }); resolve({ ...result!, client, durationMs, stderr: cleanedStderr });
} }
} }
+2
View File
@@ -28,6 +28,8 @@ export interface DebugInfo {
stderrLength: number; stderrLength: number;
stdoutLength: number; stdoutLength: number;
noisySuccess: boolean; noisySuccess: boolean;
/** Unfiltered stderr before noise removal (only present when --debug). */
rawStderr?: string;
} }
export interface ExecuteOptions { export interface ExecuteOptions {
@@ -362,4 +362,83 @@ describe("executePrompt", () => {
}); });
assert.strictEqual(debugInfos[0].noisySuccess, false); assert.strictEqual(debugInfos[0].noisySuccess, false);
}); });
it("filters codex ReasoningSummary noise from stderr on exit code 0", async () => {
const noisyStderr = [
'2026-05-20T18:33:01.969310Z ERROR codex_core::util: ReasoningSummaryPartAdded without active item',
'2026-05-20T18:33:03.281713Z ERROR codex_core::util: ReasoningSummaryDelta without active item',
'2026-05-20T18:33:03.348247Z ERROR codex_core::util: ReasoningSummaryDelta without active item',
].join('\n');
const scenarios = new Map<string, MockScenario>([
["codex exec --yolo hello", { stdout: "Hello world!", stderr: noisyStderr, exitCode: 0 }],
]);
const result = await executePrompt("codex", "hello", {
spawn: mockSpawn(scenarios),
existsSync: () => true,
});
assert.strictEqual(result.exitCode, 0);
assert.strictEqual(result.stderr, "");
assert.strictEqual(result.stdout, "Hello world!");
});
it("preserves real error stderr from codex on non-zero exit code", async () => {
const noisyStderr = [
'2026-05-20T18:33:01.969310Z ERROR codex_core::util: ReasoningSummaryDelta without active item',
'Error: something actually went wrong',
].join('\n');
const scenarios = new Map<string, MockScenario>([
["codex exec --yolo fail", { stdout: "", stderr: noisyStderr, exitCode: 1 }],
]);
const result = await executePrompt("codex", "fail", {
spawn: mockSpawn(scenarios),
existsSync: () => true,
});
assert.strictEqual(result.exitCode, 1);
assert.ok(result.stderr.includes("ReasoningSummaryDelta"));
assert.ok(result.stderr.includes("something actually went wrong"));
});
it("provides rawStderr in debug info when noise is filtered", async () => {
const noisyStderr = '2026-05-20T18:33:01.969310Z ERROR codex_core::util: ReasoningSummaryDelta without active item\n';
const scenarios = new Map<string, MockScenario>([
["codex exec --yolo hello", { stdout: "ok", stderr: noisyStderr, exitCode: 0 }],
]);
const debugInfos: any[] = [];
const result = await executePrompt("codex", "hello", {
spawn: mockSpawn(scenarios),
existsSync: () => true,
debug: true,
onDebug: (info) => debugInfos.push(info),
});
assert.strictEqual(result.exitCode, 0);
assert.strictEqual(result.stderr, "");
assert.strictEqual(debugInfos[0].rawStderr, noisyStderr);
});
it("does not set rawStderr when no noise filtering occurred", async () => {
const scenarios = new Map<string, MockScenario>([
["codex exec --yolo hello", { stdout: "ok", stderr: "", exitCode: 0 }],
]);
const debugInfos: any[] = [];
await executePrompt("codex", "hello", {
spawn: mockSpawn(scenarios),
existsSync: () => true,
debug: true,
onDebug: (info) => debugInfos.push(info),
});
assert.strictEqual(debugInfos[0].rawStderr, undefined);
});
it("does not filter stderr for non-codex clients", async () => {
const noisyStderr = '2026-05-20T18:33:01.969310Z ERROR codex_core::util: ReasoningSummaryDelta without active item\n';
const scenarios = new Map<string, MockScenario>([
["claude -p hello --dangerously-skip-permissions", { stdout: "ok", stderr: noisyStderr, exitCode: 0 }],
]);
const result = await executePrompt("claude", "hello", {
spawn: mockSpawn(scenarios),
existsSync: () => true,
});
assert.strictEqual(result.exitCode, 0);
assert.strictEqual(result.stderr, noisyStderr);
});
}); });