Compare commits
5 Commits
526101fd23
...
253a4f31e2
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
253a4f31e2 | ||
|
|
c2d47487e0 | ||
|
|
04bf34544b | ||
|
|
41a3b9d1ee | ||
|
|
8720691135 |
@@ -13,6 +13,11 @@ Create structured implementation plans with milestone and story tracking, and op
|
||||
- For Codex, native skill discovery must be configured:
|
||||
- `~/.agents/skills/superpowers -> ~/.codex/superpowers/skills`
|
||||
- For Cursor, skills must be installed under `.cursor/skills/` (repo-local) or `~/.cursor/skills/` (global)
|
||||
- Shared reviewer runtime must be installed beside agent skills when using reviewer CLIs:
|
||||
- Codex: `~/.codex/skills/reviewer-runtime/run-review.sh`
|
||||
- Claude Code: `~/.claude/skills/reviewer-runtime/run-review.sh`
|
||||
- OpenCode: `~/.config/opencode/skills/reviewer-runtime/run-review.sh`
|
||||
- Cursor: `.cursor/skills/reviewer-runtime/run-review.sh` or `~/.cursor/skills/reviewer-runtime/run-review.sh`
|
||||
|
||||
If dependencies are missing, stop and return:
|
||||
|
||||
@@ -39,6 +44,8 @@ The reviewer CLI is independent of which agent is running the planning — e.g.,
|
||||
```bash
|
||||
mkdir -p ~/.codex/skills/create-plan
|
||||
cp -R skills/create-plan/codex/* ~/.codex/skills/create-plan/
|
||||
mkdir -p ~/.codex/skills/reviewer-runtime
|
||||
cp -R skills/reviewer-runtime/* ~/.codex/skills/reviewer-runtime/
|
||||
```
|
||||
|
||||
### Claude Code
|
||||
@@ -46,6 +53,8 @@ cp -R skills/create-plan/codex/* ~/.codex/skills/create-plan/
|
||||
```bash
|
||||
mkdir -p ~/.claude/skills/create-plan
|
||||
cp -R skills/create-plan/claude-code/* ~/.claude/skills/create-plan/
|
||||
mkdir -p ~/.claude/skills/reviewer-runtime
|
||||
cp -R skills/reviewer-runtime/* ~/.claude/skills/reviewer-runtime/
|
||||
```
|
||||
|
||||
### OpenCode
|
||||
@@ -53,6 +62,8 @@ cp -R skills/create-plan/claude-code/* ~/.claude/skills/create-plan/
|
||||
```bash
|
||||
mkdir -p ~/.config/opencode/skills/create-plan
|
||||
cp -R skills/create-plan/opencode/* ~/.config/opencode/skills/create-plan/
|
||||
mkdir -p ~/.config/opencode/skills/reviewer-runtime
|
||||
cp -R skills/reviewer-runtime/* ~/.config/opencode/skills/reviewer-runtime/
|
||||
```
|
||||
|
||||
### Cursor
|
||||
@@ -62,6 +73,8 @@ Copy into the repo-local `.cursor/skills/` directory (where the Cursor Agent CLI
|
||||
```bash
|
||||
mkdir -p .cursor/skills/create-plan
|
||||
cp -R skills/create-plan/cursor/* .cursor/skills/create-plan/
|
||||
mkdir -p .cursor/skills/reviewer-runtime
|
||||
cp -R skills/reviewer-runtime/* .cursor/skills/reviewer-runtime/
|
||||
```
|
||||
|
||||
Or install globally (loaded via `~/.cursor/skills/`):
|
||||
@@ -69,6 +82,8 @@ Or install globally (loaded via `~/.cursor/skills/`):
|
||||
```bash
|
||||
mkdir -p ~/.cursor/skills/create-plan
|
||||
cp -R skills/create-plan/cursor/* ~/.cursor/skills/create-plan/
|
||||
mkdir -p ~/.cursor/skills/reviewer-runtime
|
||||
cp -R skills/reviewer-runtime/* ~/.cursor/skills/reviewer-runtime/
|
||||
```
|
||||
|
||||
## Verify Installation
|
||||
@@ -78,6 +93,10 @@ test -f ~/.codex/skills/create-plan/SKILL.md || true
|
||||
test -f ~/.claude/skills/create-plan/SKILL.md || true
|
||||
test -f ~/.config/opencode/skills/create-plan/SKILL.md || true
|
||||
test -f .cursor/skills/create-plan/SKILL.md || test -f ~/.cursor/skills/create-plan/SKILL.md || true
|
||||
test -x ~/.codex/skills/reviewer-runtime/run-review.sh || true
|
||||
test -x ~/.claude/skills/reviewer-runtime/run-review.sh || true
|
||||
test -x ~/.config/opencode/skills/reviewer-runtime/run-review.sh || true
|
||||
test -x .cursor/skills/reviewer-runtime/run-review.sh || test -x ~/.cursor/skills/reviewer-runtime/run-review.sh || true
|
||||
```
|
||||
|
||||
Verify Superpowers dependencies exist in your agent skills root:
|
||||
@@ -98,6 +117,8 @@ Verify Superpowers dependencies exist in your agent skills root:
|
||||
- Commits `.gitignore` update locally when added.
|
||||
- Asks which reviewer CLI and model to use (or accepts `skip` for no review).
|
||||
- Iteratively reviews the plan with the chosen reviewer (max 5 rounds) before generating files.
|
||||
- Runs reviewer commands through `reviewer-runtime/run-review.sh` when available, with fallback to direct synchronous execution only if the helper is missing.
|
||||
- Captures reviewer stderr and helper status logs for diagnostics and retains them on failed, empty-output, or operator-decision review rounds.
|
||||
- Produces:
|
||||
- `original-plan.md`
|
||||
- `final-transcript.md`
|
||||
@@ -110,11 +131,38 @@ Verify Superpowers dependencies exist in your agent skills root:
|
||||
After the plan is created (design + milestones + stories), the skill sends it to a second model for review:
|
||||
|
||||
1. **Configure** — user picks a reviewer CLI (`codex`, `claude`, `cursor`) and model, or skips
|
||||
2. **Submit** — plan is written to a temp file and sent to the reviewer in read-only/ask mode
|
||||
3. **Feedback** — reviewer evaluates correctness, risks, missing steps, alternatives, security
|
||||
4. **Revise** — the planning agent addresses each issue and re-submits
|
||||
5. **Repeat** — up to 5 rounds until the reviewer returns `VERDICT: APPROVED`
|
||||
6. **Finalize** — approved plan is used to generate the plan file package
|
||||
2. **Prepare** — plan payload and a bash reviewer command script are written to temp files
|
||||
3. **Run** — the command script is executed through `reviewer-runtime/run-review.sh` when installed
|
||||
4. **Feedback** — reviewer evaluates correctness, risks, missing steps, alternatives, security
|
||||
5. **Revise** — the planning agent addresses each issue and re-submits
|
||||
6. **Repeat** — up to 5 rounds until the reviewer returns `VERDICT: APPROVED`
|
||||
7. **Finalize** — approved plan is used to generate the plan file package
|
||||
|
||||
### Runtime Artifacts
|
||||
|
||||
The review flow may create these temp artifacts:
|
||||
|
||||
- `/tmp/plan-<id>.md` — plan payload
|
||||
- `/tmp/plan-review-<id>.md` — normalized review text
|
||||
- `/tmp/plan-review-<id>.json` — raw Cursor JSON output
|
||||
- `/tmp/plan-review-<id>.stderr` — reviewer stderr
|
||||
- `/tmp/plan-review-<id>.status` — helper heartbeat/status log
|
||||
- `/tmp/plan-review-<id>.runner.out` — helper-managed stdout from the reviewer command process
|
||||
- `/tmp/plan-review-<id>.sh` — reviewer command script
|
||||
|
||||
Status log lines use this format:
|
||||
|
||||
```text
|
||||
ts=<ISO-8601> level=<info|warn|error> state=<running-silent|running-active|stall-warning|completed|completed-empty-output|failed|needs-operator-decision> elapsed_s=<int> pid=<int> stdout_bytes=<int> stderr_bytes=<int> note="<short message>"
|
||||
```
|
||||
|
||||
`stall-warning` is a heartbeat/status-log state only. It is not a terminal review result.
|
||||
|
||||
### Failure Handling
|
||||
|
||||
- `completed-empty-output` means the reviewer exited without producing review text; surface `.stderr` and `.status`, then retry only after diagnosing the cause.
|
||||
- `needs-operator-decision` means the helper reached hard-timeout escalation; surface `.status` and decide whether to keep waiting, abort, or retry with different parameters.
|
||||
- Successful rounds clean up temp artifacts. Failed, empty-output, and operator-decision rounds should retain `.stderr`, `.status`, and `.runner.out` until diagnosed.
|
||||
|
||||
### Supported Reviewer CLIs
|
||||
|
||||
@@ -124,6 +172,12 @@ After the plan is created (design + milestones + stories), the skill sends it to
|
||||
| `claude` | `claude -p --model <model> --allowedTools Read` | No (fresh call each round) | `--allowedTools Read` |
|
||||
| `cursor` | `cursor-agent -p --mode=ask --model <model> --trust --output-format json` | Yes (`--resume <id>`) | `--mode=ask` |
|
||||
|
||||
For all three CLIs, the preferred execution path is:
|
||||
|
||||
1. write the reviewer command to a bash script
|
||||
2. run that script through `reviewer-runtime/run-review.sh`
|
||||
3. fall back to direct synchronous execution only if the helper is missing or not executable
|
||||
|
||||
## Template Guardrails
|
||||
|
||||
All plan templates now include guardrail sections that enforce:
|
||||
|
||||
@@ -20,6 +20,11 @@ Execute an existing plan (created by `create-plan`) in an isolated git worktree,
|
||||
- For Codex, native skill discovery must be configured:
|
||||
- `~/.agents/skills/superpowers -> ~/.codex/superpowers/skills`
|
||||
- For Cursor, skills must be installed under `.cursor/skills/` (repo-local) or `~/.cursor/skills/` (global)
|
||||
- Shared reviewer runtime must be installed beside agent skills when using reviewer CLIs:
|
||||
- Codex: `~/.codex/skills/reviewer-runtime/run-review.sh`
|
||||
- Claude Code: `~/.claude/skills/reviewer-runtime/run-review.sh`
|
||||
- OpenCode: `~/.config/opencode/skills/reviewer-runtime/run-review.sh`
|
||||
- Cursor: `.cursor/skills/reviewer-runtime/run-review.sh` or `~/.cursor/skills/reviewer-runtime/run-review.sh`
|
||||
|
||||
If dependencies are missing, stop and return:
|
||||
|
||||
@@ -46,6 +51,8 @@ The reviewer CLI is independent of which agent is running the implementation —
|
||||
```bash
|
||||
mkdir -p ~/.codex/skills/implement-plan
|
||||
cp -R skills/implement-plan/codex/* ~/.codex/skills/implement-plan/
|
||||
mkdir -p ~/.codex/skills/reviewer-runtime
|
||||
cp -R skills/reviewer-runtime/* ~/.codex/skills/reviewer-runtime/
|
||||
```
|
||||
|
||||
### Claude Code
|
||||
@@ -53,6 +60,8 @@ cp -R skills/implement-plan/codex/* ~/.codex/skills/implement-plan/
|
||||
```bash
|
||||
mkdir -p ~/.claude/skills/implement-plan
|
||||
cp -R skills/implement-plan/claude-code/* ~/.claude/skills/implement-plan/
|
||||
mkdir -p ~/.claude/skills/reviewer-runtime
|
||||
cp -R skills/reviewer-runtime/* ~/.claude/skills/reviewer-runtime/
|
||||
```
|
||||
|
||||
### OpenCode
|
||||
@@ -60,6 +69,8 @@ cp -R skills/implement-plan/claude-code/* ~/.claude/skills/implement-plan/
|
||||
```bash
|
||||
mkdir -p ~/.config/opencode/skills/implement-plan
|
||||
cp -R skills/implement-plan/opencode/* ~/.config/opencode/skills/implement-plan/
|
||||
mkdir -p ~/.config/opencode/skills/reviewer-runtime
|
||||
cp -R skills/reviewer-runtime/* ~/.config/opencode/skills/reviewer-runtime/
|
||||
```
|
||||
|
||||
### Cursor
|
||||
@@ -69,6 +80,8 @@ Copy into the repo-local `.cursor/skills/` directory (where the Cursor Agent CLI
|
||||
```bash
|
||||
mkdir -p .cursor/skills/implement-plan
|
||||
cp -R skills/implement-plan/cursor/* .cursor/skills/implement-plan/
|
||||
mkdir -p .cursor/skills/reviewer-runtime
|
||||
cp -R skills/reviewer-runtime/* .cursor/skills/reviewer-runtime/
|
||||
```
|
||||
|
||||
Or install globally (loaded via `~/.cursor/skills/`):
|
||||
@@ -76,6 +89,8 @@ Or install globally (loaded via `~/.cursor/skills/`):
|
||||
```bash
|
||||
mkdir -p ~/.cursor/skills/implement-plan
|
||||
cp -R skills/implement-plan/cursor/* ~/.cursor/skills/implement-plan/
|
||||
mkdir -p ~/.cursor/skills/reviewer-runtime
|
||||
cp -R skills/reviewer-runtime/* ~/.cursor/skills/reviewer-runtime/
|
||||
```
|
||||
|
||||
## Verify Installation
|
||||
@@ -85,6 +100,10 @@ test -f ~/.codex/skills/implement-plan/SKILL.md || true
|
||||
test -f ~/.claude/skills/implement-plan/SKILL.md || true
|
||||
test -f ~/.config/opencode/skills/implement-plan/SKILL.md || true
|
||||
test -f .cursor/skills/implement-plan/SKILL.md || test -f ~/.cursor/skills/implement-plan/SKILL.md || true
|
||||
test -x ~/.codex/skills/reviewer-runtime/run-review.sh || true
|
||||
test -x ~/.claude/skills/reviewer-runtime/run-review.sh || true
|
||||
test -x ~/.config/opencode/skills/reviewer-runtime/run-review.sh || true
|
||||
test -x .cursor/skills/reviewer-runtime/run-review.sh || test -x ~/.cursor/skills/reviewer-runtime/run-review.sh || true
|
||||
```
|
||||
|
||||
Verify Superpowers execution dependencies exist in your agent skills root:
|
||||
@@ -113,6 +132,8 @@ Verify Superpowers execution dependencies exist in your agent skills root:
|
||||
- Executes milestones one-by-one, tracking stories in `story-tracker.md`.
|
||||
- Runs lint/typecheck/tests as a gate before each milestone review.
|
||||
- Sends each milestone to a reviewer CLI for approval (max rounds configurable, default 10).
|
||||
- Runs reviewer commands through `reviewer-runtime/run-review.sh` when available, with fallback to direct synchronous execution only if the helper is missing.
|
||||
- Captures reviewer stderr and helper status logs for diagnostics and retains them on failed, empty-output, or operator-decision review rounds.
|
||||
- Commits each milestone locally only after reviewer approval (does not push).
|
||||
- After all milestones approved, merges worktree branch to parent and deletes worktree.
|
||||
- Supports resume: detects existing worktree and `in-dev`/`completed` stories.
|
||||
@@ -122,11 +143,38 @@ Verify Superpowers execution dependencies exist in your agent skills root:
|
||||
After each milestone is implemented and verified, the skill sends it to a second model for review:
|
||||
|
||||
1. **Configure** — user picks a reviewer CLI (`codex`, `claude`, `cursor`) and model, or skips
|
||||
2. **Submit** — milestone spec, acceptance criteria, git diff, and verification output are written to a temp file and sent to the reviewer in read-only/ask mode
|
||||
3. **Feedback** — reviewer evaluates correctness, acceptance criteria, code quality, test coverage, security
|
||||
4. **Revise** — the implementing agent addresses each issue, re-verifies, and re-submits
|
||||
5. **Repeat** — up to max rounds (default 10) until the reviewer returns `VERDICT: APPROVED`
|
||||
6. **Approve** — milestone is marked approved in `story-tracker.md`
|
||||
2. **Prepare** — milestone payload and a bash reviewer command script are written to temp files
|
||||
3. **Run** — the command script is executed through `reviewer-runtime/run-review.sh` when installed
|
||||
4. **Feedback** — reviewer evaluates correctness, acceptance criteria, code quality, test coverage, security
|
||||
5. **Revise** — the implementing agent addresses each issue, re-verifies, and re-submits
|
||||
6. **Repeat** — up to max rounds (default 10) until the reviewer returns `VERDICT: APPROVED`
|
||||
7. **Approve** — milestone is marked approved in `story-tracker.md`
|
||||
|
||||
### Runtime Artifacts
|
||||
|
||||
The milestone review flow may create these temp artifacts:
|
||||
|
||||
- `/tmp/milestone-<id>.md` — milestone review payload
|
||||
- `/tmp/milestone-review-<id>.md` — normalized review text
|
||||
- `/tmp/milestone-review-<id>.json` — raw Cursor JSON output
|
||||
- `/tmp/milestone-review-<id>.stderr` — reviewer stderr
|
||||
- `/tmp/milestone-review-<id>.status` — helper heartbeat/status log
|
||||
- `/tmp/milestone-review-<id>.runner.out` — helper-managed stdout from the reviewer command process
|
||||
- `/tmp/milestone-review-<id>.sh` — reviewer command script
|
||||
|
||||
Status log lines use this format:
|
||||
|
||||
```text
|
||||
ts=<ISO-8601> level=<info|warn|error> state=<running-silent|running-active|stall-warning|completed|completed-empty-output|failed|needs-operator-decision> elapsed_s=<int> pid=<int> stdout_bytes=<int> stderr_bytes=<int> note="<short message>"
|
||||
```
|
||||
|
||||
`stall-warning` is a heartbeat/status-log state only. It is not a terminal review result.
|
||||
|
||||
### Failure Handling
|
||||
|
||||
- `completed-empty-output` means the reviewer exited without producing review text; surface `.stderr` and `.status`, then retry only after diagnosing the cause.
|
||||
- `needs-operator-decision` means the helper reached hard-timeout escalation; surface `.status` and decide whether to keep waiting, abort, or retry with different parameters.
|
||||
- Successful rounds clean up temp artifacts. Failed, empty-output, and operator-decision rounds should retain `.stderr`, `.status`, and `.runner.out` until diagnosed.
|
||||
|
||||
### Supported Reviewer CLIs
|
||||
|
||||
@@ -136,6 +184,26 @@ After each milestone is implemented and verified, the skill sends it to a second
|
||||
| `claude` | `claude -p --model <model> --allowedTools Read` | No (fresh call each round) | `--allowedTools Read` |
|
||||
| `cursor` | `cursor-agent -p --mode=ask --model <model> --trust --output-format json` | Yes (`--resume <id>`) | `--mode=ask` |
|
||||
|
||||
For all three CLIs, the preferred execution path is:
|
||||
|
||||
1. write the reviewer command to a bash script
|
||||
2. run that script through `reviewer-runtime/run-review.sh`
|
||||
3. fall back to direct synchronous execution only if the helper is missing or not executable
|
||||
|
||||
The helper also supports manual override flags for diagnostics:
|
||||
|
||||
```bash
|
||||
run-review.sh \
|
||||
--command-file <path> \
|
||||
--stdout-file <path> \
|
||||
--stderr-file <path> \
|
||||
--status-file <path> \
|
||||
--poll-seconds 10 \
|
||||
--soft-timeout-seconds 600 \
|
||||
--stall-warning-seconds 300 \
|
||||
--hard-timeout-seconds 1800
|
||||
```
|
||||
|
||||
## Variant Hardening Notes
|
||||
|
||||
### Claude Code
|
||||
|
||||
@@ -71,7 +71,20 @@ Send the plan to the configured reviewer CLI for feedback. Revise and re-submit
|
||||
REVIEW_ID=$(uuidgen | tr '[:upper:]' '[:lower:]' | head -c 8)
|
||||
```
|
||||
|
||||
Use for all temp file paths: `/tmp/plan-${REVIEW_ID}.md` and `/tmp/plan-review-${REVIEW_ID}.md`.
|
||||
Use for temp artifacts:
|
||||
- `/tmp/plan-${REVIEW_ID}.md`
|
||||
- `/tmp/plan-review-${REVIEW_ID}.md`
|
||||
- `/tmp/plan-review-${REVIEW_ID}.json` (Cursor only)
|
||||
- `/tmp/plan-review-${REVIEW_ID}.stderr`
|
||||
- `/tmp/plan-review-${REVIEW_ID}.status`
|
||||
- `/tmp/plan-review-${REVIEW_ID}.runner.out`
|
||||
- `/tmp/plan-review-${REVIEW_ID}.sh`
|
||||
|
||||
Resolve the shared reviewer helper from the installed Claude Code skills directory:
|
||||
|
||||
```bash
|
||||
REVIEWER_RUNTIME=~/.claude/skills/reviewer-runtime/run-review.sh
|
||||
```
|
||||
|
||||
#### Step 2: Write Plan to Temp File
|
||||
|
||||
@@ -79,6 +92,13 @@ Write the complete plan (milestones, stories, design decisions, specs) to `/tmp/
|
||||
|
||||
#### Step 3: Submit to Reviewer (Round 1)
|
||||
|
||||
Write the reviewer invocation to `/tmp/plan-review-${REVIEW_ID}.sh` as a bash script:
|
||||
|
||||
```bash
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
```
|
||||
|
||||
**If `REVIEWER_CLI` is `codex`:**
|
||||
|
||||
```bash
|
||||
@@ -97,7 +117,7 @@ Be specific and actionable. If the plan is solid, end with exactly: VERDICT: APP
|
||||
If changes are needed, end with exactly: VERDICT: REVISE"
|
||||
```
|
||||
|
||||
Capture the Codex session ID from output (line `session id: <uuid>`). Store as `CODEX_SESSION_ID` for session resume in subsequent rounds.
|
||||
Do not try to capture the Codex session ID yet. When using the helper, extract it from `/tmp/plan-review-${REVIEW_ID}.runner.out` after the command completes (look for `session id: <uuid>`), then store it as `CODEX_SESSION_ID` for resume in subsequent rounds.
|
||||
|
||||
**If `REVIEWER_CLI` is `claude`:**
|
||||
|
||||
@@ -113,8 +133,7 @@ claude -p \
|
||||
Be specific and actionable. If the plan is solid, end with exactly: VERDICT: APPROVED
|
||||
If changes are needed, end with exactly: VERDICT: REVISE" \
|
||||
--model ${REVIEWER_MODEL} \
|
||||
--allowedTools Read \
|
||||
> /tmp/plan-review-${REVIEW_ID}.md
|
||||
--allowedTools Read
|
||||
```
|
||||
|
||||
**If `REVIEWER_CLI` is `cursor`:**
|
||||
@@ -137,19 +156,41 @@ If changes are needed, end with exactly: VERDICT: REVISE" \
|
||||
> /tmp/plan-review-${REVIEW_ID}.json
|
||||
```
|
||||
|
||||
Extract session ID and review text (requires `jq`):
|
||||
For `cursor`, the command script writes raw JSON to `/tmp/plan-review-${REVIEW_ID}.json`. Do not run `jq` extraction until after the helper or fallback execution completes. If `jq` is not installed, inform the user: `brew install jq` (macOS) or equivalent.
|
||||
|
||||
Run the command script through the shared helper when available:
|
||||
|
||||
```bash
|
||||
if [ -x "$REVIEWER_RUNTIME" ]; then
|
||||
"$REVIEWER_RUNTIME" \
|
||||
--command-file /tmp/plan-review-${REVIEW_ID}.sh \
|
||||
--stdout-file /tmp/plan-review-${REVIEW_ID}.runner.out \
|
||||
--stderr-file /tmp/plan-review-${REVIEW_ID}.stderr \
|
||||
--status-file /tmp/plan-review-${REVIEW_ID}.status
|
||||
else
|
||||
echo "Warning: reviewer runtime helper not found at $REVIEWER_RUNTIME; falling back to direct synchronous review." >&2
|
||||
bash /tmp/plan-review-${REVIEW_ID}.sh >/tmp/plan-review-${REVIEW_ID}.runner.out 2>/tmp/plan-review-${REVIEW_ID}.stderr
|
||||
fi
|
||||
```
|
||||
|
||||
After the command completes:
|
||||
- If `REVIEWER_CLI=cursor`, extract the final review text:
|
||||
|
||||
```bash
|
||||
CURSOR_SESSION_ID=$(jq -r '.session_id' /tmp/plan-review-${REVIEW_ID}.json)
|
||||
jq -r '.result' /tmp/plan-review-${REVIEW_ID}.json > /tmp/plan-review-${REVIEW_ID}.md
|
||||
```
|
||||
|
||||
If `jq` is not installed, inform the user: `brew install jq` (macOS) or equivalent.
|
||||
- If `REVIEWER_CLI=codex`, extract `CODEX_SESSION_ID` from `/tmp/plan-review-${REVIEW_ID}.runner.out` after the helper or fallback run. If the review text is only in `.runner.out`, move or copy the actual review body into `/tmp/plan-review-${REVIEW_ID}.md` before verdict parsing.
|
||||
|
||||
#### Step 4: Read Review & Check Verdict
|
||||
|
||||
1. Read `/tmp/plan-review-${REVIEW_ID}.md`
|
||||
2. Present review to the user:
|
||||
2. If the review failed, produced empty output, or reached helper timeout, also read:
|
||||
- `/tmp/plan-review-${REVIEW_ID}.stderr`
|
||||
- `/tmp/plan-review-${REVIEW_ID}.status`
|
||||
- `/tmp/plan-review-${REVIEW_ID}.runner.out`
|
||||
3. Present review to the user:
|
||||
|
||||
```
|
||||
## Plan Review — Round N (reviewer: ${REVIEWER_CLI} / ${REVIEWER_MODEL})
|
||||
@@ -161,6 +202,8 @@ If `jq` is not installed, inform the user: `brew install jq` (macOS) or equivale
|
||||
- **VERDICT: APPROVED** → proceed to Phase 7 (Initialize workspace)
|
||||
- **VERDICT: REVISE** → go to Step 5
|
||||
- No clear verdict but positive / no actionable items → treat as approved
|
||||
- Helper state `completed-empty-output` → treat as failed review attempt, surface stderr/status, fix invocation or prompt handling, then retry
|
||||
- Helper state `needs-operator-decision` → surface status log and decide whether to keep waiting, abort, or retry with different helper parameters
|
||||
- Max rounds (5) reached → proceed with warning
|
||||
|
||||
#### Step 5: Revise the Plan
|
||||
@@ -214,8 +257,7 @@ Changes made:
|
||||
Re-review the full plan. If solid, end with: VERDICT: APPROVED
|
||||
If more changes needed, end with: VERDICT: REVISE" \
|
||||
--model ${REVIEWER_MODEL} \
|
||||
--allowedTools Read \
|
||||
> /tmp/plan-review-${REVIEW_ID}.md
|
||||
--allowedTools Read
|
||||
```
|
||||
|
||||
**If `REVIEWER_CLI` is `cursor`:**
|
||||
@@ -242,6 +284,8 @@ jq -r '.result' /tmp/plan-review-${REVIEW_ID}.json > /tmp/plan-review-${REVIEW_I
|
||||
|
||||
If resume fails, fall back to fresh `cursor-agent -p` with context about prior rounds.
|
||||
|
||||
After updating `/tmp/plan-review-${REVIEW_ID}.sh`, run the same helper/fallback flow from Round 1.
|
||||
|
||||
Return to Step 4.
|
||||
|
||||
#### Step 7: Present Final Result
|
||||
@@ -259,9 +303,17 @@ Return to Step 4.
|
||||
#### Step 8: Cleanup
|
||||
|
||||
```bash
|
||||
rm -f /tmp/plan-${REVIEW_ID}.md /tmp/plan-review-${REVIEW_ID}.md /tmp/plan-review-${REVIEW_ID}.json
|
||||
rm -f /tmp/plan-${REVIEW_ID}.md \
|
||||
/tmp/plan-review-${REVIEW_ID}.md \
|
||||
/tmp/plan-review-${REVIEW_ID}.json \
|
||||
/tmp/plan-review-${REVIEW_ID}.stderr \
|
||||
/tmp/plan-review-${REVIEW_ID}.status \
|
||||
/tmp/plan-review-${REVIEW_ID}.runner.out \
|
||||
/tmp/plan-review-${REVIEW_ID}.sh
|
||||
```
|
||||
|
||||
If the round failed, produced empty output, or reached operator-decision timeout, keep `.stderr`, `.status`, and `.runner.out` until the issue is diagnosed instead of deleting them immediately.
|
||||
|
||||
### Phase 7: Initialize Local Plan Workspace (MANDATORY)
|
||||
|
||||
At project root:
|
||||
|
||||
@@ -94,7 +94,20 @@ Send the plan to the configured reviewer CLI for feedback. Revise and re-submit
|
||||
REVIEW_ID=$(uuidgen | tr '[:upper:]' '[:lower:]' | head -c 8)
|
||||
```
|
||||
|
||||
Use for all temp file paths: `/tmp/plan-${REVIEW_ID}.md` and `/tmp/plan-review-${REVIEW_ID}.md`.
|
||||
Use for temp artifacts:
|
||||
- `/tmp/plan-${REVIEW_ID}.md` - plan payload
|
||||
- `/tmp/plan-review-${REVIEW_ID}.md` - normalized review text presented to the user
|
||||
- `/tmp/plan-review-${REVIEW_ID}.json` - raw Cursor JSON (only for `cursor`)
|
||||
- `/tmp/plan-review-${REVIEW_ID}.stderr` - reviewer stderr
|
||||
- `/tmp/plan-review-${REVIEW_ID}.status` - helper heartbeat/status log
|
||||
- `/tmp/plan-review-${REVIEW_ID}.runner.out` - helper-managed stdout from the reviewer command process
|
||||
- `/tmp/plan-review-${REVIEW_ID}.sh` - reviewer command script
|
||||
|
||||
Resolve the shared reviewer helper from the installed Codex skills directory:
|
||||
|
||||
```bash
|
||||
REVIEWER_RUNTIME=~/.codex/skills/reviewer-runtime/run-review.sh
|
||||
```
|
||||
|
||||
#### Step 2: Write Plan to Temp File
|
||||
|
||||
@@ -102,6 +115,13 @@ Write the complete plan (milestones, stories, design decisions, specs) to `/tmp/
|
||||
|
||||
#### Step 3: Submit to Reviewer (Round 1)
|
||||
|
||||
Write the reviewer invocation to `/tmp/plan-review-${REVIEW_ID}.sh` as a bash script:
|
||||
|
||||
```bash
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
```
|
||||
|
||||
**If `REVIEWER_CLI` is `codex`:**
|
||||
|
||||
```bash
|
||||
@@ -120,7 +140,7 @@ Be specific and actionable. If the plan is solid, end with exactly: VERDICT: APP
|
||||
If changes are needed, end with exactly: VERDICT: REVISE"
|
||||
```
|
||||
|
||||
Capture the Codex session ID from output (line `session id: <uuid>`). Store as `CODEX_SESSION_ID` for session resume in subsequent rounds.
|
||||
Do not try to capture the Codex session ID yet. When using the helper, extract it from `/tmp/plan-review-${REVIEW_ID}.runner.out` after the command completes (look for `session id: <uuid>`), then store it as `CODEX_SESSION_ID` for resume in subsequent rounds.
|
||||
|
||||
**If `REVIEWER_CLI` is `claude`:**
|
||||
|
||||
@@ -136,8 +156,7 @@ claude -p \
|
||||
Be specific and actionable. If the plan is solid, end with exactly: VERDICT: APPROVED
|
||||
If changes are needed, end with exactly: VERDICT: REVISE" \
|
||||
--model ${REVIEWER_MODEL} \
|
||||
--allowedTools Read \
|
||||
> /tmp/plan-review-${REVIEW_ID}.md
|
||||
--allowedTools Read
|
||||
```
|
||||
|
||||
**If `REVIEWER_CLI` is `cursor`:**
|
||||
@@ -160,19 +179,43 @@ If changes are needed, end with exactly: VERDICT: REVISE" \
|
||||
> /tmp/plan-review-${REVIEW_ID}.json
|
||||
```
|
||||
|
||||
Extract session ID and review text (requires `jq`):
|
||||
For `cursor`, the command script writes raw JSON to `/tmp/plan-review-${REVIEW_ID}.json`. Do not run `jq` extraction until after the helper or fallback execution completes. If `jq` is not installed, inform the user: `brew install jq` (macOS) or equivalent.
|
||||
|
||||
Run the command script through the shared helper when available:
|
||||
|
||||
```bash
|
||||
if [ -x "$REVIEWER_RUNTIME" ]; then
|
||||
"$REVIEWER_RUNTIME" \
|
||||
--command-file /tmp/plan-review-${REVIEW_ID}.sh \
|
||||
--stdout-file /tmp/plan-review-${REVIEW_ID}.runner.out \
|
||||
--stderr-file /tmp/plan-review-${REVIEW_ID}.stderr \
|
||||
--status-file /tmp/plan-review-${REVIEW_ID}.status
|
||||
else
|
||||
echo "Warning: reviewer runtime helper not found at $REVIEWER_RUNTIME; falling back to direct synchronous review." >&2
|
||||
bash /tmp/plan-review-${REVIEW_ID}.sh >/tmp/plan-review-${REVIEW_ID}.runner.out 2>/tmp/plan-review-${REVIEW_ID}.stderr
|
||||
fi
|
||||
```
|
||||
|
||||
After the command completes:
|
||||
- If `REVIEWER_CLI=cursor`, extract the final review text:
|
||||
|
||||
```bash
|
||||
CURSOR_SESSION_ID=$(jq -r '.session_id' /tmp/plan-review-${REVIEW_ID}.json)
|
||||
jq -r '.result' /tmp/plan-review-${REVIEW_ID}.json > /tmp/plan-review-${REVIEW_ID}.md
|
||||
```
|
||||
|
||||
If `jq` is not installed, inform the user: `brew install jq` (macOS) or equivalent.
|
||||
- If `REVIEWER_CLI=codex`, extract `CODEX_SESSION_ID` from `/tmp/plan-review-${REVIEW_ID}.runner.out` after the helper or fallback run. If the review text is only in `.runner.out`, move or copy the actual review body into `/tmp/plan-review-${REVIEW_ID}.md` before verdict parsing.
|
||||
|
||||
Fallback is allowed only when the helper is missing or not executable.
|
||||
|
||||
#### Step 4: Read Review & Check Verdict
|
||||
|
||||
1. Read `/tmp/plan-review-${REVIEW_ID}.md`
|
||||
2. Present review to the user:
|
||||
2. If the review failed, produced empty output, or reached helper timeout, also read:
|
||||
- `/tmp/plan-review-${REVIEW_ID}.stderr`
|
||||
- `/tmp/plan-review-${REVIEW_ID}.status`
|
||||
- `/tmp/plan-review-${REVIEW_ID}.runner.out`
|
||||
3. Present review to the user:
|
||||
|
||||
```
|
||||
## Plan Review — Round N (reviewer: ${REVIEWER_CLI} / ${REVIEWER_MODEL})
|
||||
@@ -184,6 +227,8 @@ If `jq` is not installed, inform the user: `brew install jq` (macOS) or equivale
|
||||
- **VERDICT: APPROVED** → proceed to Phase 7 (Initialize workspace)
|
||||
- **VERDICT: REVISE** → go to Step 5
|
||||
- No clear verdict but positive / no actionable items → treat as approved
|
||||
- Helper state `completed-empty-output` → treat as failed review attempt, surface stderr/status, fix invocation or prompt handling, then retry
|
||||
- Helper state `needs-operator-decision` → surface status log and decide whether to keep waiting, abort, or retry with different helper parameters
|
||||
- Max rounds (5) reached → proceed with warning
|
||||
|
||||
#### Step 5: Revise the Plan
|
||||
@@ -237,8 +282,7 @@ Changes made:
|
||||
Re-review the full plan. If solid, end with: VERDICT: APPROVED
|
||||
If more changes needed, end with: VERDICT: REVISE" \
|
||||
--model ${REVIEWER_MODEL} \
|
||||
--allowedTools Read \
|
||||
> /tmp/plan-review-${REVIEW_ID}.md
|
||||
--allowedTools Read
|
||||
```
|
||||
|
||||
**If `REVIEWER_CLI` is `cursor`:**
|
||||
@@ -265,6 +309,8 @@ jq -r '.result' /tmp/plan-review-${REVIEW_ID}.json > /tmp/plan-review-${REVIEW_I
|
||||
|
||||
If resume fails, fall back to fresh `cursor-agent -p` with context about prior rounds.
|
||||
|
||||
After updating `/tmp/plan-review-${REVIEW_ID}.sh`, run the same helper/fallback flow from Round 1.
|
||||
|
||||
Return to Step 4.
|
||||
|
||||
#### Step 7: Present Final Result
|
||||
@@ -282,9 +328,17 @@ Return to Step 4.
|
||||
#### Step 8: Cleanup
|
||||
|
||||
```bash
|
||||
rm -f /tmp/plan-${REVIEW_ID}.md /tmp/plan-review-${REVIEW_ID}.md /tmp/plan-review-${REVIEW_ID}.json
|
||||
rm -f /tmp/plan-${REVIEW_ID}.md \
|
||||
/tmp/plan-review-${REVIEW_ID}.md \
|
||||
/tmp/plan-review-${REVIEW_ID}.json \
|
||||
/tmp/plan-review-${REVIEW_ID}.stderr \
|
||||
/tmp/plan-review-${REVIEW_ID}.status \
|
||||
/tmp/plan-review-${REVIEW_ID}.runner.out \
|
||||
/tmp/plan-review-${REVIEW_ID}.sh
|
||||
```
|
||||
|
||||
If the round failed, produced empty output, or reached operator-decision timeout, keep `.stderr`, `.status`, and `.runner.out` until the issue is diagnosed instead of deleting them immediately.
|
||||
|
||||
### Phase 7: Initialize Local Plan Workspace (MANDATORY)
|
||||
|
||||
At project root:
|
||||
|
||||
@@ -96,7 +96,24 @@ Send the plan to the configured reviewer CLI for feedback. Revise and re-submit
|
||||
REVIEW_ID=$(uuidgen | tr '[:upper:]' '[:lower:]' | head -c 8)
|
||||
```
|
||||
|
||||
Use for all temp file paths: `/tmp/plan-${REVIEW_ID}.md` and `/tmp/plan-review-${REVIEW_ID}.md`.
|
||||
Use for temp artifacts:
|
||||
- `/tmp/plan-${REVIEW_ID}.md`
|
||||
- `/tmp/plan-review-${REVIEW_ID}.md`
|
||||
- `/tmp/plan-review-${REVIEW_ID}.json` (Cursor only)
|
||||
- `/tmp/plan-review-${REVIEW_ID}.stderr`
|
||||
- `/tmp/plan-review-${REVIEW_ID}.status`
|
||||
- `/tmp/plan-review-${REVIEW_ID}.runner.out`
|
||||
- `/tmp/plan-review-${REVIEW_ID}.sh`
|
||||
|
||||
Resolve the shared reviewer helper from Cursor's installed skills directory:
|
||||
|
||||
```bash
|
||||
if [ -x .cursor/skills/reviewer-runtime/run-review.sh ]; then
|
||||
REVIEWER_RUNTIME=.cursor/skills/reviewer-runtime/run-review.sh
|
||||
else
|
||||
REVIEWER_RUNTIME=~/.cursor/skills/reviewer-runtime/run-review.sh
|
||||
fi
|
||||
```
|
||||
|
||||
#### Step 2: Write Plan to Temp File
|
||||
|
||||
@@ -104,6 +121,13 @@ Write the complete plan (milestones, stories, design decisions, specs) to `/tmp/
|
||||
|
||||
#### Step 3: Submit to Reviewer (Round 1)
|
||||
|
||||
Write the reviewer invocation to `/tmp/plan-review-${REVIEW_ID}.sh` as a bash script:
|
||||
|
||||
```bash
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
```
|
||||
|
||||
**If `REVIEWER_CLI` is `codex`:**
|
||||
|
||||
```bash
|
||||
@@ -122,7 +146,7 @@ Be specific and actionable. If the plan is solid, end with exactly: VERDICT: APP
|
||||
If changes are needed, end with exactly: VERDICT: REVISE"
|
||||
```
|
||||
|
||||
Capture the Codex session ID from output (line `session id: <uuid>`). Store as `CODEX_SESSION_ID` for session resume in subsequent rounds.
|
||||
Do not try to capture the Codex session ID yet. When using the helper, extract it from `/tmp/plan-review-${REVIEW_ID}.runner.out` after the command completes (look for `session id: <uuid>`), then store it as `CODEX_SESSION_ID` for resume in subsequent rounds.
|
||||
|
||||
**If `REVIEWER_CLI` is `claude`:**
|
||||
|
||||
@@ -138,8 +162,7 @@ claude -p \
|
||||
Be specific and actionable. If the plan is solid, end with exactly: VERDICT: APPROVED
|
||||
If changes are needed, end with exactly: VERDICT: REVISE" \
|
||||
--model ${REVIEWER_MODEL} \
|
||||
--allowedTools Read \
|
||||
> /tmp/plan-review-${REVIEW_ID}.md
|
||||
--allowedTools Read
|
||||
```
|
||||
|
||||
**If `REVIEWER_CLI` is `cursor`:**
|
||||
@@ -162,12 +185,7 @@ If changes are needed, end with exactly: VERDICT: REVISE" \
|
||||
> /tmp/plan-review-${REVIEW_ID}.json
|
||||
```
|
||||
|
||||
Extract session ID and review text:
|
||||
|
||||
```bash
|
||||
CURSOR_SESSION_ID=$(jq -r '.session_id' /tmp/plan-review-${REVIEW_ID}.json)
|
||||
jq -r '.result' /tmp/plan-review-${REVIEW_ID}.json > /tmp/plan-review-${REVIEW_ID}.md
|
||||
```
|
||||
For `cursor`, the command script writes raw JSON to `/tmp/plan-review-${REVIEW_ID}.json`. Do not run `jq` extraction until after the helper or fallback execution completes.
|
||||
|
||||
Notes on Cursor flags:
|
||||
- `--mode=ask` — read-only mode, no file modifications
|
||||
@@ -175,10 +193,39 @@ Notes on Cursor flags:
|
||||
- `-p` / `--print` — non-interactive mode, output to stdout
|
||||
- `--output-format json` — structured output with `session_id` and `result` fields
|
||||
|
||||
Run the command script through the shared helper when available:
|
||||
|
||||
```bash
|
||||
if [ -x "$REVIEWER_RUNTIME" ]; then
|
||||
"$REVIEWER_RUNTIME" \
|
||||
--command-file /tmp/plan-review-${REVIEW_ID}.sh \
|
||||
--stdout-file /tmp/plan-review-${REVIEW_ID}.runner.out \
|
||||
--stderr-file /tmp/plan-review-${REVIEW_ID}.stderr \
|
||||
--status-file /tmp/plan-review-${REVIEW_ID}.status
|
||||
else
|
||||
echo "Warning: reviewer runtime helper not found at $REVIEWER_RUNTIME; falling back to direct synchronous review." >&2
|
||||
bash /tmp/plan-review-${REVIEW_ID}.sh >/tmp/plan-review-${REVIEW_ID}.runner.out 2>/tmp/plan-review-${REVIEW_ID}.stderr
|
||||
fi
|
||||
```
|
||||
|
||||
After the command completes:
|
||||
- If `REVIEWER_CLI=cursor`, extract the final review text:
|
||||
|
||||
```bash
|
||||
CURSOR_SESSION_ID=$(jq -r '.session_id' /tmp/plan-review-${REVIEW_ID}.json)
|
||||
jq -r '.result' /tmp/plan-review-${REVIEW_ID}.json > /tmp/plan-review-${REVIEW_ID}.md
|
||||
```
|
||||
|
||||
- If `REVIEWER_CLI=codex`, extract `CODEX_SESSION_ID` from `/tmp/plan-review-${REVIEW_ID}.runner.out` after the helper or fallback run. If the review text is only in `.runner.out`, move or copy the actual review body into `/tmp/plan-review-${REVIEW_ID}.md` before verdict parsing.
|
||||
|
||||
#### Step 4: Read Review & Check Verdict
|
||||
|
||||
1. Read `/tmp/plan-review-${REVIEW_ID}.md`
|
||||
2. Present review to the user:
|
||||
2. If the review failed, produced empty output, or reached helper timeout, also read:
|
||||
- `/tmp/plan-review-${REVIEW_ID}.stderr`
|
||||
- `/tmp/plan-review-${REVIEW_ID}.status`
|
||||
- `/tmp/plan-review-${REVIEW_ID}.runner.out`
|
||||
3. Present review to the user:
|
||||
|
||||
```
|
||||
## Plan Review — Round N (reviewer: ${REVIEWER_CLI} / ${REVIEWER_MODEL})
|
||||
@@ -190,6 +237,8 @@ Notes on Cursor flags:
|
||||
- **VERDICT: APPROVED** → proceed to Phase 7 (Initialize workspace)
|
||||
- **VERDICT: REVISE** → go to Step 5
|
||||
- No clear verdict but positive / no actionable items → treat as approved
|
||||
- Helper state `completed-empty-output` → treat as failed review attempt, surface stderr/status, fix invocation or prompt handling, then retry
|
||||
- Helper state `needs-operator-decision` → surface status log and decide whether to keep waiting, abort, or retry with different helper parameters
|
||||
- Max rounds (5) reached → proceed with warning
|
||||
|
||||
#### Step 5: Revise the Plan
|
||||
@@ -243,8 +292,7 @@ Changes made:
|
||||
Re-review the full plan. If solid, end with: VERDICT: APPROVED
|
||||
If more changes needed, end with: VERDICT: REVISE" \
|
||||
--model ${REVIEWER_MODEL} \
|
||||
--allowedTools Read \
|
||||
> /tmp/plan-review-${REVIEW_ID}.md
|
||||
--allowedTools Read
|
||||
```
|
||||
|
||||
**If `REVIEWER_CLI` is `cursor`:**
|
||||
@@ -271,6 +319,8 @@ jq -r '.result' /tmp/plan-review-${REVIEW_ID}.json > /tmp/plan-review-${REVIEW_I
|
||||
|
||||
If resume fails, fall back to fresh `cursor-agent -p` with context about prior rounds.
|
||||
|
||||
After updating `/tmp/plan-review-${REVIEW_ID}.sh`, run the same helper/fallback flow from Round 1.
|
||||
|
||||
Return to Step 4.
|
||||
|
||||
#### Step 7: Present Final Result
|
||||
@@ -288,9 +338,17 @@ Return to Step 4.
|
||||
#### Step 8: Cleanup
|
||||
|
||||
```bash
|
||||
rm -f /tmp/plan-${REVIEW_ID}.md /tmp/plan-review-${REVIEW_ID}.md /tmp/plan-review-${REVIEW_ID}.json
|
||||
rm -f /tmp/plan-${REVIEW_ID}.md \
|
||||
/tmp/plan-review-${REVIEW_ID}.md \
|
||||
/tmp/plan-review-${REVIEW_ID}.json \
|
||||
/tmp/plan-review-${REVIEW_ID}.stderr \
|
||||
/tmp/plan-review-${REVIEW_ID}.status \
|
||||
/tmp/plan-review-${REVIEW_ID}.runner.out \
|
||||
/tmp/plan-review-${REVIEW_ID}.sh
|
||||
```
|
||||
|
||||
If the round failed, produced empty output, or reached operator-decision timeout, keep `.stderr`, `.status`, and `.runner.out` until the issue is diagnosed instead of deleting them immediately.
|
||||
|
||||
### Phase 7: Initialize Local Plan Workspace (MANDATORY)
|
||||
|
||||
At project root:
|
||||
|
||||
@@ -88,7 +88,20 @@ Send the plan to the configured reviewer CLI for feedback. Revise and re-submit
|
||||
REVIEW_ID=$(uuidgen | tr '[:upper:]' '[:lower:]' | head -c 8)
|
||||
```
|
||||
|
||||
Use for all temp file paths: `/tmp/plan-${REVIEW_ID}.md` and `/tmp/plan-review-${REVIEW_ID}.md`.
|
||||
Use for temp artifacts:
|
||||
- `/tmp/plan-${REVIEW_ID}.md`
|
||||
- `/tmp/plan-review-${REVIEW_ID}.md`
|
||||
- `/tmp/plan-review-${REVIEW_ID}.json` (Cursor only)
|
||||
- `/tmp/plan-review-${REVIEW_ID}.stderr`
|
||||
- `/tmp/plan-review-${REVIEW_ID}.status`
|
||||
- `/tmp/plan-review-${REVIEW_ID}.runner.out`
|
||||
- `/tmp/plan-review-${REVIEW_ID}.sh`
|
||||
|
||||
Resolve the shared reviewer helper from the installed OpenCode skills directory:
|
||||
|
||||
```bash
|
||||
REVIEWER_RUNTIME=~/.config/opencode/skills/reviewer-runtime/run-review.sh
|
||||
```
|
||||
|
||||
#### Step 2: Write Plan to Temp File
|
||||
|
||||
@@ -96,6 +109,13 @@ Write the complete plan (milestones, stories, design decisions, specs) to `/tmp/
|
||||
|
||||
#### Step 3: Submit to Reviewer (Round 1)
|
||||
|
||||
Write the reviewer invocation to `/tmp/plan-review-${REVIEW_ID}.sh` as a bash script:
|
||||
|
||||
```bash
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
```
|
||||
|
||||
**If `REVIEWER_CLI` is `codex`:**
|
||||
|
||||
```bash
|
||||
@@ -114,7 +134,7 @@ Be specific and actionable. If the plan is solid, end with exactly: VERDICT: APP
|
||||
If changes are needed, end with exactly: VERDICT: REVISE"
|
||||
```
|
||||
|
||||
Capture the Codex session ID from output (line `session id: <uuid>`). Store as `CODEX_SESSION_ID` for session resume in subsequent rounds.
|
||||
Do not try to capture the Codex session ID yet. When using the helper, extract it from `/tmp/plan-review-${REVIEW_ID}.runner.out` after the command completes (look for `session id: <uuid>`), then store it as `CODEX_SESSION_ID` for resume in subsequent rounds.
|
||||
|
||||
**If `REVIEWER_CLI` is `claude`:**
|
||||
|
||||
@@ -130,8 +150,7 @@ claude -p \
|
||||
Be specific and actionable. If the plan is solid, end with exactly: VERDICT: APPROVED
|
||||
If changes are needed, end with exactly: VERDICT: REVISE" \
|
||||
--model ${REVIEWER_MODEL} \
|
||||
--allowedTools Read \
|
||||
> /tmp/plan-review-${REVIEW_ID}.md
|
||||
--allowedTools Read
|
||||
```
|
||||
|
||||
**If `REVIEWER_CLI` is `cursor`:**
|
||||
@@ -154,19 +173,41 @@ If changes are needed, end with exactly: VERDICT: REVISE" \
|
||||
> /tmp/plan-review-${REVIEW_ID}.json
|
||||
```
|
||||
|
||||
Extract session ID and review text (requires `jq`):
|
||||
For `cursor`, the command script writes raw JSON to `/tmp/plan-review-${REVIEW_ID}.json`. Do not run `jq` extraction until after the helper or fallback execution completes. If `jq` is not installed, inform the user: `brew install jq` (macOS) or equivalent.
|
||||
|
||||
Run the command script through the shared helper when available:
|
||||
|
||||
```bash
|
||||
if [ -x "$REVIEWER_RUNTIME" ]; then
|
||||
"$REVIEWER_RUNTIME" \
|
||||
--command-file /tmp/plan-review-${REVIEW_ID}.sh \
|
||||
--stdout-file /tmp/plan-review-${REVIEW_ID}.runner.out \
|
||||
--stderr-file /tmp/plan-review-${REVIEW_ID}.stderr \
|
||||
--status-file /tmp/plan-review-${REVIEW_ID}.status
|
||||
else
|
||||
echo "Warning: reviewer runtime helper not found at $REVIEWER_RUNTIME; falling back to direct synchronous review." >&2
|
||||
bash /tmp/plan-review-${REVIEW_ID}.sh >/tmp/plan-review-${REVIEW_ID}.runner.out 2>/tmp/plan-review-${REVIEW_ID}.stderr
|
||||
fi
|
||||
```
|
||||
|
||||
After the command completes:
|
||||
- If `REVIEWER_CLI=cursor`, extract the final review text:
|
||||
|
||||
```bash
|
||||
CURSOR_SESSION_ID=$(jq -r '.session_id' /tmp/plan-review-${REVIEW_ID}.json)
|
||||
jq -r '.result' /tmp/plan-review-${REVIEW_ID}.json > /tmp/plan-review-${REVIEW_ID}.md
|
||||
```
|
||||
|
||||
If `jq` is not installed, inform the user: `brew install jq` (macOS) or equivalent.
|
||||
- If `REVIEWER_CLI=codex`, extract `CODEX_SESSION_ID` from `/tmp/plan-review-${REVIEW_ID}.runner.out` after the helper or fallback run. If the review text is only in `.runner.out`, move or copy the actual review body into `/tmp/plan-review-${REVIEW_ID}.md` before verdict parsing.
|
||||
|
||||
#### Step 4: Read Review & Check Verdict
|
||||
|
||||
1. Read `/tmp/plan-review-${REVIEW_ID}.md`
|
||||
2. Present review to the user:
|
||||
2. If the review failed, produced empty output, or reached helper timeout, also read:
|
||||
- `/tmp/plan-review-${REVIEW_ID}.stderr`
|
||||
- `/tmp/plan-review-${REVIEW_ID}.status`
|
||||
- `/tmp/plan-review-${REVIEW_ID}.runner.out`
|
||||
3. Present review to the user:
|
||||
|
||||
```
|
||||
## Plan Review — Round N (reviewer: ${REVIEWER_CLI} / ${REVIEWER_MODEL})
|
||||
@@ -178,6 +219,8 @@ If `jq` is not installed, inform the user: `brew install jq` (macOS) or equivale
|
||||
- **VERDICT: APPROVED** → proceed to Phase 8 (Initialize workspace)
|
||||
- **VERDICT: REVISE** → go to Step 5
|
||||
- No clear verdict but positive / no actionable items → treat as approved
|
||||
- Helper state `completed-empty-output` → treat as failed review attempt, surface stderr/status, fix invocation or prompt handling, then retry
|
||||
- Helper state `needs-operator-decision` → surface status log and decide whether to keep waiting, abort, or retry with different helper parameters
|
||||
- Max rounds (5) reached → proceed with warning
|
||||
|
||||
#### Step 5: Revise the Plan
|
||||
@@ -231,8 +274,7 @@ Changes made:
|
||||
Re-review the full plan. If solid, end with: VERDICT: APPROVED
|
||||
If more changes needed, end with: VERDICT: REVISE" \
|
||||
--model ${REVIEWER_MODEL} \
|
||||
--allowedTools Read \
|
||||
> /tmp/plan-review-${REVIEW_ID}.md
|
||||
--allowedTools Read
|
||||
```
|
||||
|
||||
**If `REVIEWER_CLI` is `cursor`:**
|
||||
@@ -259,6 +301,8 @@ jq -r '.result' /tmp/plan-review-${REVIEW_ID}.json > /tmp/plan-review-${REVIEW_I
|
||||
|
||||
If resume fails, fall back to fresh `cursor-agent -p` with context about prior rounds.
|
||||
|
||||
After updating `/tmp/plan-review-${REVIEW_ID}.sh`, run the same helper/fallback flow from Round 1.
|
||||
|
||||
Return to Step 4.
|
||||
|
||||
#### Step 7: Present Final Result
|
||||
@@ -276,9 +320,17 @@ Return to Step 4.
|
||||
#### Step 8: Cleanup
|
||||
|
||||
```bash
|
||||
rm -f /tmp/plan-${REVIEW_ID}.md /tmp/plan-review-${REVIEW_ID}.md /tmp/plan-review-${REVIEW_ID}.json
|
||||
rm -f /tmp/plan-${REVIEW_ID}.md \
|
||||
/tmp/plan-review-${REVIEW_ID}.md \
|
||||
/tmp/plan-review-${REVIEW_ID}.json \
|
||||
/tmp/plan-review-${REVIEW_ID}.stderr \
|
||||
/tmp/plan-review-${REVIEW_ID}.status \
|
||||
/tmp/plan-review-${REVIEW_ID}.runner.out \
|
||||
/tmp/plan-review-${REVIEW_ID}.sh
|
||||
```
|
||||
|
||||
If the round failed, produced empty output, or reached operator-decision timeout, keep `.stderr`, `.status`, and `.runner.out` until the issue is diagnosed instead of deleting them immediately.
|
||||
|
||||
### Phase 8: Initialize Local Plan Workspace (MANDATORY)
|
||||
|
||||
At project root:
|
||||
|
||||
@@ -136,7 +136,20 @@ Do NOT push. After committing:
|
||||
REVIEW_ID=$(uuidgen | tr '[:upper:]' '[:lower:]' | head -c 8)
|
||||
```
|
||||
|
||||
Use for all temp file paths: `/tmp/milestone-${REVIEW_ID}.md` and `/tmp/milestone-review-${REVIEW_ID}.md`.
|
||||
Use `REVIEW_ID` for all milestone review temp file paths:
|
||||
- `/tmp/milestone-${REVIEW_ID}.md`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.md`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.json`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.stderr`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.status`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.runner.out`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.sh`
|
||||
|
||||
Resolve the shared runtime helper path before writing the command script:
|
||||
|
||||
```bash
|
||||
REVIEWER_RUNTIME=~/.claude/skills/reviewer-runtime/run-review.sh
|
||||
```
|
||||
|
||||
#### Step 2: Write Review Payload
|
||||
|
||||
@@ -165,6 +178,13 @@ Write to `/tmp/milestone-${REVIEW_ID}.md`:
|
||||
|
||||
#### Step 3: Submit to Reviewer (Round 1)
|
||||
|
||||
Write the reviewer invocation to `/tmp/milestone-review-${REVIEW_ID}.sh` as a bash script:
|
||||
|
||||
```bash
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
```
|
||||
|
||||
**If `REVIEWER_CLI` is `codex`:**
|
||||
|
||||
```bash
|
||||
@@ -185,7 +205,7 @@ Be specific and actionable. If solid, end with exactly: VERDICT: APPROVED
|
||||
If changes are needed, end with exactly: VERDICT: REVISE"
|
||||
```
|
||||
|
||||
Capture the Codex session ID from output (line `session id: <uuid>`). Store as `CODEX_SESSION_ID`.
|
||||
Do not try to capture the Codex session ID yet. When using the helper, extract it from `/tmp/milestone-review-${REVIEW_ID}.runner.out` after the command completes (look for `session id: <uuid>`), then store it as `CODEX_SESSION_ID` for resume in subsequent rounds.
|
||||
|
||||
**If `REVIEWER_CLI` is `claude`:**
|
||||
|
||||
@@ -203,8 +223,7 @@ Evaluate:
|
||||
Be specific and actionable. If solid, end with exactly: VERDICT: APPROVED
|
||||
If changes are needed, end with exactly: VERDICT: REVISE" \
|
||||
--model ${REVIEWER_MODEL} \
|
||||
--allowedTools Read \
|
||||
> /tmp/milestone-review-${REVIEW_ID}.md
|
||||
--allowedTools Read
|
||||
```
|
||||
|
||||
**If `REVIEWER_CLI` is `cursor`:**
|
||||
@@ -229,19 +248,43 @@ If changes are needed, end with exactly: VERDICT: REVISE" \
|
||||
> /tmp/milestone-review-${REVIEW_ID}.json
|
||||
```
|
||||
|
||||
Extract session ID and review text (requires `jq`):
|
||||
For `cursor`, the command script writes raw JSON to `/tmp/milestone-review-${REVIEW_ID}.json`. Do not run `jq` extraction until after the helper or fallback execution completes. If `jq` is not installed, inform the user: `brew install jq` (macOS) or equivalent.
|
||||
|
||||
Run the command script through the shared helper when available:
|
||||
|
||||
```bash
|
||||
if [ -x "$REVIEWER_RUNTIME" ]; then
|
||||
"$REVIEWER_RUNTIME" \
|
||||
--command-file /tmp/milestone-review-${REVIEW_ID}.sh \
|
||||
--stdout-file /tmp/milestone-review-${REVIEW_ID}.runner.out \
|
||||
--stderr-file /tmp/milestone-review-${REVIEW_ID}.stderr \
|
||||
--status-file /tmp/milestone-review-${REVIEW_ID}.status
|
||||
else
|
||||
echo "Warning: reviewer runtime helper not found at $REVIEWER_RUNTIME; falling back to direct synchronous review." >&2
|
||||
bash /tmp/milestone-review-${REVIEW_ID}.sh >/tmp/milestone-review-${REVIEW_ID}.runner.out 2>/tmp/milestone-review-${REVIEW_ID}.stderr
|
||||
fi
|
||||
```
|
||||
|
||||
After the command completes:
|
||||
- If `REVIEWER_CLI=cursor`, extract the final review text:
|
||||
|
||||
```bash
|
||||
CURSOR_SESSION_ID=$(jq -r '.session_id' /tmp/milestone-review-${REVIEW_ID}.json)
|
||||
jq -r '.result' /tmp/milestone-review-${REVIEW_ID}.json > /tmp/milestone-review-${REVIEW_ID}.md
|
||||
```
|
||||
|
||||
If `jq` is not installed, inform the user: `brew install jq` (macOS) or equivalent.
|
||||
- If `REVIEWER_CLI=codex`, extract `CODEX_SESSION_ID` from `/tmp/milestone-review-${REVIEW_ID}.runner.out` after the helper or fallback run. If the review text is only in `.runner.out`, move or copy the actual review body into `/tmp/milestone-review-${REVIEW_ID}.md` before verdict parsing.
|
||||
|
||||
Fallback is allowed only when the helper is missing or not executable.
|
||||
|
||||
#### Step 4: Read Review & Check Verdict
|
||||
|
||||
1. Read `/tmp/milestone-review-${REVIEW_ID}.md`
|
||||
2. Present review to the user:
|
||||
2. If the review failed, produced empty output, or reached helper timeout, also read:
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.stderr`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.status`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.runner.out`
|
||||
3. Present review to the user:
|
||||
|
||||
```
|
||||
## Milestone Review — Round N (reviewer: ${REVIEWER_CLI} / ${REVIEWER_MODEL})
|
||||
@@ -249,10 +292,12 @@ If `jq` is not installed, inform the user: `brew install jq` (macOS) or equivale
|
||||
[Reviewer feedback]
|
||||
```
|
||||
|
||||
3. Check verdict:
|
||||
4. Check verdict:
|
||||
- **VERDICT: APPROVED** -> proceed to Phase 4 Step 6 (commit & approve)
|
||||
- **VERDICT: REVISE** -> go to Step 5
|
||||
- No clear verdict but positive / no actionable items -> treat as approved
|
||||
- Helper state `completed-empty-output` -> treat as failed review attempt, surface stderr/status, fix invocation or prompt handling, then retry
|
||||
- Helper state `needs-operator-decision` -> surface status log, note any `stall-warning` heartbeat lines as non-terminal operator hints, and decide whether to keep waiting, abort, or retry with different helper parameters
|
||||
- Max rounds (`MAX_ROUNDS`) reached -> present to user for manual decision (proceed or stop)
|
||||
|
||||
#### Step 5: Address Feedback & Re-verify
|
||||
@@ -272,6 +317,8 @@ If a revision contradicts the user's explicit requirements, skip it and note it
|
||||
|
||||
#### Step 6: Re-submit to Reviewer (Rounds 2-N)
|
||||
|
||||
Rewrite `/tmp/milestone-review-${REVIEW_ID}.sh` for the next round. The script should contain the reviewer invocation only; do not run it directly.
|
||||
|
||||
**If `REVIEWER_CLI` is `codex`:**
|
||||
|
||||
Resume the existing session:
|
||||
@@ -330,20 +377,31 @@ Changes made:
|
||||
Re-review. If solid, end with: VERDICT: APPROVED
|
||||
If more changes needed, end with: VERDICT: REVISE" \
|
||||
> /tmp/milestone-review-${REVIEW_ID}.json
|
||||
|
||||
jq -r '.result' /tmp/milestone-review-${REVIEW_ID}.json > /tmp/milestone-review-${REVIEW_ID}.md
|
||||
```
|
||||
|
||||
If resume fails, fall back to fresh `cursor-agent -p` with context about prior rounds.
|
||||
|
||||
Do not run `jq` extraction until after the helper or fallback execution completes, then extract `/tmp/milestone-review-${REVIEW_ID}.md` from the JSON response.
|
||||
|
||||
After updating `/tmp/milestone-review-${REVIEW_ID}.sh`, run the same helper/fallback flow from Round 1.
|
||||
|
||||
Return to Step 4.
|
||||
|
||||
#### Step 7: Cleanup Per Milestone
|
||||
|
||||
```bash
|
||||
rm -f /tmp/milestone-${REVIEW_ID}.md /tmp/milestone-review-${REVIEW_ID}.md /tmp/milestone-review-${REVIEW_ID}.json
|
||||
rm -f \
|
||||
/tmp/milestone-${REVIEW_ID}.md \
|
||||
/tmp/milestone-review-${REVIEW_ID}.md \
|
||||
/tmp/milestone-review-${REVIEW_ID}.json \
|
||||
/tmp/milestone-review-${REVIEW_ID}.stderr \
|
||||
/tmp/milestone-review-${REVIEW_ID}.status \
|
||||
/tmp/milestone-review-${REVIEW_ID}.runner.out \
|
||||
/tmp/milestone-review-${REVIEW_ID}.sh
|
||||
```
|
||||
|
||||
If the round failed, produced empty output, or reached operator-decision timeout, keep `.stderr`, `.status`, and `.runner.out` until the issue is diagnosed instead of deleting them immediately.
|
||||
|
||||
### Phase 6: Completion (REQUIRED SUB-SKILL)
|
||||
|
||||
After all milestones are approved and committed:
|
||||
|
||||
@@ -169,7 +169,20 @@ Do NOT push. After committing:
|
||||
REVIEW_ID=$(uuidgen | tr '[:upper:]' '[:lower:]' | head -c 8)
|
||||
```
|
||||
|
||||
Use for all temp file paths: `/tmp/milestone-${REVIEW_ID}.md` and `/tmp/milestone-review-${REVIEW_ID}.md`.
|
||||
Use `REVIEW_ID` for all milestone review temp file paths:
|
||||
- `/tmp/milestone-${REVIEW_ID}.md`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.md`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.json`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.stderr`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.status`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.runner.out`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.sh`
|
||||
|
||||
Resolve the shared runtime helper path before writing the command script:
|
||||
|
||||
```bash
|
||||
REVIEWER_RUNTIME=~/.codex/skills/reviewer-runtime/run-review.sh
|
||||
```
|
||||
|
||||
#### Step 2: Write Review Payload
|
||||
|
||||
@@ -198,6 +211,13 @@ Write to `/tmp/milestone-${REVIEW_ID}.md`:
|
||||
|
||||
#### Step 3: Submit to Reviewer (Round 1)
|
||||
|
||||
Write the reviewer invocation to `/tmp/milestone-review-${REVIEW_ID}.sh` as a bash script:
|
||||
|
||||
```bash
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
```
|
||||
|
||||
**If `REVIEWER_CLI` is `codex`:**
|
||||
|
||||
```bash
|
||||
@@ -218,7 +238,7 @@ Be specific and actionable. If solid, end with exactly: VERDICT: APPROVED
|
||||
If changes are needed, end with exactly: VERDICT: REVISE"
|
||||
```
|
||||
|
||||
Capture the Codex session ID from output (line `session id: <uuid>`). Store as `CODEX_SESSION_ID`.
|
||||
Do not try to capture the Codex session ID yet. When using the helper, extract it from `/tmp/milestone-review-${REVIEW_ID}.runner.out` after the command completes (look for `session id: <uuid>`), then store it as `CODEX_SESSION_ID` for resume in subsequent rounds.
|
||||
|
||||
**If `REVIEWER_CLI` is `claude`:**
|
||||
|
||||
@@ -236,8 +256,7 @@ Evaluate:
|
||||
Be specific and actionable. If solid, end with exactly: VERDICT: APPROVED
|
||||
If changes are needed, end with exactly: VERDICT: REVISE" \
|
||||
--model ${REVIEWER_MODEL} \
|
||||
--allowedTools Read \
|
||||
> /tmp/milestone-review-${REVIEW_ID}.md
|
||||
--allowedTools Read
|
||||
```
|
||||
|
||||
**If `REVIEWER_CLI` is `cursor`:**
|
||||
@@ -262,19 +281,43 @@ If changes are needed, end with exactly: VERDICT: REVISE" \
|
||||
> /tmp/milestone-review-${REVIEW_ID}.json
|
||||
```
|
||||
|
||||
Extract session ID and review text (requires `jq`):
|
||||
For `cursor`, the command script writes raw JSON to `/tmp/milestone-review-${REVIEW_ID}.json`. Do not run `jq` extraction until after the helper or fallback execution completes. If `jq` is not installed, inform the user: `brew install jq` (macOS) or equivalent.
|
||||
|
||||
Run the command script through the shared helper when available:
|
||||
|
||||
```bash
|
||||
if [ -x "$REVIEWER_RUNTIME" ]; then
|
||||
"$REVIEWER_RUNTIME" \
|
||||
--command-file /tmp/milestone-review-${REVIEW_ID}.sh \
|
||||
--stdout-file /tmp/milestone-review-${REVIEW_ID}.runner.out \
|
||||
--stderr-file /tmp/milestone-review-${REVIEW_ID}.stderr \
|
||||
--status-file /tmp/milestone-review-${REVIEW_ID}.status
|
||||
else
|
||||
echo "Warning: reviewer runtime helper not found at $REVIEWER_RUNTIME; falling back to direct synchronous review." >&2
|
||||
bash /tmp/milestone-review-${REVIEW_ID}.sh >/tmp/milestone-review-${REVIEW_ID}.runner.out 2>/tmp/milestone-review-${REVIEW_ID}.stderr
|
||||
fi
|
||||
```
|
||||
|
||||
After the command completes:
|
||||
- If `REVIEWER_CLI=cursor`, extract the final review text:
|
||||
|
||||
```bash
|
||||
CURSOR_SESSION_ID=$(jq -r '.session_id' /tmp/milestone-review-${REVIEW_ID}.json)
|
||||
jq -r '.result' /tmp/milestone-review-${REVIEW_ID}.json > /tmp/milestone-review-${REVIEW_ID}.md
|
||||
```
|
||||
|
||||
If `jq` is not installed, inform the user: `brew install jq` (macOS) or equivalent.
|
||||
- If `REVIEWER_CLI=codex`, extract `CODEX_SESSION_ID` from `/tmp/milestone-review-${REVIEW_ID}.runner.out` after the helper or fallback run. If the review text is only in `.runner.out`, move or copy the actual review body into `/tmp/milestone-review-${REVIEW_ID}.md` before verdict parsing.
|
||||
|
||||
Fallback is allowed only when the helper is missing or not executable.
|
||||
|
||||
#### Step 4: Read Review & Check Verdict
|
||||
|
||||
1. Read `/tmp/milestone-review-${REVIEW_ID}.md`
|
||||
2. Present review to the user:
|
||||
2. If the review failed, produced empty output, or reached helper timeout, also read:
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.stderr`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.status`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.runner.out`
|
||||
3. Present review to the user:
|
||||
|
||||
```
|
||||
## Milestone Review — Round N (reviewer: ${REVIEWER_CLI} / ${REVIEWER_MODEL})
|
||||
@@ -282,10 +325,12 @@ If `jq` is not installed, inform the user: `brew install jq` (macOS) or equivale
|
||||
[Reviewer feedback]
|
||||
```
|
||||
|
||||
3. Check verdict:
|
||||
4. Check verdict:
|
||||
- **VERDICT: APPROVED** -> proceed to Phase 4 Step 6 (commit & approve)
|
||||
- **VERDICT: REVISE** -> go to Step 5
|
||||
- No clear verdict but positive / no actionable items -> treat as approved
|
||||
- Helper state `completed-empty-output` -> treat as failed review attempt, surface stderr/status, fix invocation or prompt handling, then retry
|
||||
- Helper state `needs-operator-decision` -> surface status log, note any `stall-warning` heartbeat lines as non-terminal operator hints, and decide whether to keep waiting, abort, or retry with different helper parameters
|
||||
- Max rounds (`MAX_ROUNDS`) reached -> present to user for manual decision (proceed or stop)
|
||||
|
||||
#### Step 5: Address Feedback & Re-verify
|
||||
@@ -305,6 +350,8 @@ If a revision contradicts the user's explicit requirements, skip it and note it
|
||||
|
||||
#### Step 6: Re-submit to Reviewer (Rounds 2-N)
|
||||
|
||||
Rewrite `/tmp/milestone-review-${REVIEW_ID}.sh` for the next round. The script should contain the reviewer invocation only; do not run it directly.
|
||||
|
||||
**If `REVIEWER_CLI` is `codex`:**
|
||||
|
||||
Resume the existing session:
|
||||
@@ -363,20 +410,31 @@ Changes made:
|
||||
Re-review. If solid, end with: VERDICT: APPROVED
|
||||
If more changes needed, end with: VERDICT: REVISE" \
|
||||
> /tmp/milestone-review-${REVIEW_ID}.json
|
||||
|
||||
jq -r '.result' /tmp/milestone-review-${REVIEW_ID}.json > /tmp/milestone-review-${REVIEW_ID}.md
|
||||
```
|
||||
|
||||
If resume fails, fall back to fresh `cursor-agent -p` with context about prior rounds.
|
||||
|
||||
Do not run `jq` extraction until after the helper or fallback execution completes, then extract `/tmp/milestone-review-${REVIEW_ID}.md` from the JSON response.
|
||||
|
||||
After updating `/tmp/milestone-review-${REVIEW_ID}.sh`, run the same helper/fallback flow from Round 1.
|
||||
|
||||
Return to Step 4.
|
||||
|
||||
#### Step 7: Cleanup Per Milestone
|
||||
|
||||
```bash
|
||||
rm -f /tmp/milestone-${REVIEW_ID}.md /tmp/milestone-review-${REVIEW_ID}.md /tmp/milestone-review-${REVIEW_ID}.json
|
||||
rm -f \
|
||||
/tmp/milestone-${REVIEW_ID}.md \
|
||||
/tmp/milestone-review-${REVIEW_ID}.md \
|
||||
/tmp/milestone-review-${REVIEW_ID}.json \
|
||||
/tmp/milestone-review-${REVIEW_ID}.stderr \
|
||||
/tmp/milestone-review-${REVIEW_ID}.status \
|
||||
/tmp/milestone-review-${REVIEW_ID}.runner.out \
|
||||
/tmp/milestone-review-${REVIEW_ID}.sh
|
||||
```
|
||||
|
||||
If the round failed, produced empty output, or reached operator-decision timeout, keep `.stderr`, `.status`, and `.runner.out` until the issue is diagnosed instead of deleting them immediately.
|
||||
|
||||
### Phase 6: Completion (REQUIRED SUB-SKILL)
|
||||
|
||||
After all milestones are approved and committed:
|
||||
|
||||
@@ -169,7 +169,24 @@ Do NOT push. After committing:
|
||||
REVIEW_ID=$(uuidgen | tr '[:upper:]' '[:lower:]' | head -c 8)
|
||||
```
|
||||
|
||||
Use for all temp file paths: `/tmp/milestone-${REVIEW_ID}.md` and `/tmp/milestone-review-${REVIEW_ID}.md`.
|
||||
Use `REVIEW_ID` for all milestone review temp file paths:
|
||||
- `/tmp/milestone-${REVIEW_ID}.md`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.md`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.json`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.stderr`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.status`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.runner.out`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.sh`
|
||||
|
||||
Resolve the shared runtime helper path before writing the command script:
|
||||
|
||||
```bash
|
||||
if [ -x .cursor/skills/reviewer-runtime/run-review.sh ]; then
|
||||
REVIEWER_RUNTIME=.cursor/skills/reviewer-runtime/run-review.sh
|
||||
else
|
||||
REVIEWER_RUNTIME=~/.cursor/skills/reviewer-runtime/run-review.sh
|
||||
fi
|
||||
```
|
||||
|
||||
#### Step 2: Write Review Payload
|
||||
|
||||
@@ -198,6 +215,13 @@ Write to `/tmp/milestone-${REVIEW_ID}.md`:
|
||||
|
||||
#### Step 3: Submit to Reviewer (Round 1)
|
||||
|
||||
Write the reviewer invocation to `/tmp/milestone-review-${REVIEW_ID}.sh` as a bash script:
|
||||
|
||||
```bash
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
```
|
||||
|
||||
**If `REVIEWER_CLI` is `codex`:**
|
||||
|
||||
```bash
|
||||
@@ -218,7 +242,7 @@ Be specific and actionable. If solid, end with exactly: VERDICT: APPROVED
|
||||
If changes are needed, end with exactly: VERDICT: REVISE"
|
||||
```
|
||||
|
||||
Capture the Codex session ID from output (line `session id: <uuid>`). Store as `CODEX_SESSION_ID`.
|
||||
Do not try to capture the Codex session ID yet. When using the helper, extract it from `/tmp/milestone-review-${REVIEW_ID}.runner.out` after the command completes (look for `session id: <uuid>`), then store it as `CODEX_SESSION_ID` for resume in subsequent rounds.
|
||||
|
||||
**If `REVIEWER_CLI` is `claude`:**
|
||||
|
||||
@@ -236,8 +260,7 @@ Evaluate:
|
||||
Be specific and actionable. If solid, end with exactly: VERDICT: APPROVED
|
||||
If changes are needed, end with exactly: VERDICT: REVISE" \
|
||||
--model ${REVIEWER_MODEL} \
|
||||
--allowedTools Read \
|
||||
> /tmp/milestone-review-${REVIEW_ID}.md
|
||||
--allowedTools Read
|
||||
```
|
||||
|
||||
**If `REVIEWER_CLI` is `cursor`:**
|
||||
@@ -262,12 +285,7 @@ If changes are needed, end with exactly: VERDICT: REVISE" \
|
||||
> /tmp/milestone-review-${REVIEW_ID}.json
|
||||
```
|
||||
|
||||
Extract session ID and review text:
|
||||
|
||||
```bash
|
||||
CURSOR_SESSION_ID=$(jq -r '.session_id' /tmp/milestone-review-${REVIEW_ID}.json)
|
||||
jq -r '.result' /tmp/milestone-review-${REVIEW_ID}.json > /tmp/milestone-review-${REVIEW_ID}.md
|
||||
```
|
||||
For `cursor`, the command script writes raw JSON to `/tmp/milestone-review-${REVIEW_ID}.json`. Do not run `jq` extraction until after the helper or fallback execution completes.
|
||||
|
||||
Notes on Cursor flags:
|
||||
- `--mode=ask` — read-only mode, no file modifications
|
||||
@@ -275,10 +293,41 @@ Notes on Cursor flags:
|
||||
- `-p` / `--print` — non-interactive mode, output to stdout
|
||||
- `--output-format json` — structured output with `session_id` and `result` fields
|
||||
|
||||
Run the command script through the shared helper when available:
|
||||
|
||||
```bash
|
||||
if [ -x "$REVIEWER_RUNTIME" ]; then
|
||||
"$REVIEWER_RUNTIME" \
|
||||
--command-file /tmp/milestone-review-${REVIEW_ID}.sh \
|
||||
--stdout-file /tmp/milestone-review-${REVIEW_ID}.runner.out \
|
||||
--stderr-file /tmp/milestone-review-${REVIEW_ID}.stderr \
|
||||
--status-file /tmp/milestone-review-${REVIEW_ID}.status
|
||||
else
|
||||
echo "Warning: reviewer runtime helper not found at $REVIEWER_RUNTIME; falling back to direct synchronous review." >&2
|
||||
bash /tmp/milestone-review-${REVIEW_ID}.sh >/tmp/milestone-review-${REVIEW_ID}.runner.out 2>/tmp/milestone-review-${REVIEW_ID}.stderr
|
||||
fi
|
||||
```
|
||||
|
||||
After the command completes:
|
||||
- If `REVIEWER_CLI=cursor`, extract the final review text:
|
||||
|
||||
```bash
|
||||
CURSOR_SESSION_ID=$(jq -r '.session_id' /tmp/milestone-review-${REVIEW_ID}.json)
|
||||
jq -r '.result' /tmp/milestone-review-${REVIEW_ID}.json > /tmp/milestone-review-${REVIEW_ID}.md
|
||||
```
|
||||
|
||||
- If `REVIEWER_CLI=codex`, extract `CODEX_SESSION_ID` from `/tmp/milestone-review-${REVIEW_ID}.runner.out` after the helper or fallback run. If the review text is only in `.runner.out`, move or copy the actual review body into `/tmp/milestone-review-${REVIEW_ID}.md` before verdict parsing.
|
||||
|
||||
Fallback is allowed only when the helper is missing or not executable.
|
||||
|
||||
#### Step 4: Read Review & Check Verdict
|
||||
|
||||
1. Read `/tmp/milestone-review-${REVIEW_ID}.md`
|
||||
2. Present review to the user:
|
||||
2. If the review failed, produced empty output, or reached helper timeout, also read:
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.stderr`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.status`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.runner.out`
|
||||
3. Present review to the user:
|
||||
|
||||
```
|
||||
## Milestone Review — Round N (reviewer: ${REVIEWER_CLI} / ${REVIEWER_MODEL})
|
||||
@@ -286,10 +335,12 @@ Notes on Cursor flags:
|
||||
[Reviewer feedback]
|
||||
```
|
||||
|
||||
3. Check verdict:
|
||||
4. Check verdict:
|
||||
- **VERDICT: APPROVED** -> proceed to Phase 4 Step 6 (commit & approve)
|
||||
- **VERDICT: REVISE** -> go to Step 5
|
||||
- No clear verdict but positive / no actionable items -> treat as approved
|
||||
- Helper state `completed-empty-output` -> treat as failed review attempt, surface stderr/status, fix invocation or prompt handling, then retry
|
||||
- Helper state `needs-operator-decision` -> surface status log, note any `stall-warning` heartbeat lines as non-terminal operator hints, and decide whether to keep waiting, abort, or retry with different helper parameters
|
||||
- Max rounds (`MAX_ROUNDS`) reached -> present to user for manual decision (proceed or stop)
|
||||
|
||||
#### Step 5: Address Feedback & Re-verify
|
||||
@@ -309,6 +360,8 @@ If a revision contradicts the user's explicit requirements, skip it and note it
|
||||
|
||||
#### Step 6: Re-submit to Reviewer (Rounds 2-N)
|
||||
|
||||
Rewrite `/tmp/milestone-review-${REVIEW_ID}.sh` for the next round. The script should contain the reviewer invocation only; do not run it directly.
|
||||
|
||||
**If `REVIEWER_CLI` is `codex`:**
|
||||
|
||||
Resume the existing session:
|
||||
@@ -367,20 +420,31 @@ Changes made:
|
||||
Re-review. If solid, end with: VERDICT: APPROVED
|
||||
If more changes needed, end with: VERDICT: REVISE" \
|
||||
> /tmp/milestone-review-${REVIEW_ID}.json
|
||||
|
||||
jq -r '.result' /tmp/milestone-review-${REVIEW_ID}.json > /tmp/milestone-review-${REVIEW_ID}.md
|
||||
```
|
||||
|
||||
If resume fails, fall back to fresh `cursor-agent -p` with context about prior rounds.
|
||||
|
||||
Do not run `jq` extraction until after the helper or fallback execution completes, then extract `/tmp/milestone-review-${REVIEW_ID}.md` from the JSON response.
|
||||
|
||||
After updating `/tmp/milestone-review-${REVIEW_ID}.sh`, run the same helper/fallback flow from Round 1.
|
||||
|
||||
Return to Step 4.
|
||||
|
||||
#### Step 7: Cleanup Per Milestone
|
||||
|
||||
```bash
|
||||
rm -f /tmp/milestone-${REVIEW_ID}.md /tmp/milestone-review-${REVIEW_ID}.md /tmp/milestone-review-${REVIEW_ID}.json
|
||||
rm -f \
|
||||
/tmp/milestone-${REVIEW_ID}.md \
|
||||
/tmp/milestone-review-${REVIEW_ID}.md \
|
||||
/tmp/milestone-review-${REVIEW_ID}.json \
|
||||
/tmp/milestone-review-${REVIEW_ID}.stderr \
|
||||
/tmp/milestone-review-${REVIEW_ID}.status \
|
||||
/tmp/milestone-review-${REVIEW_ID}.runner.out \
|
||||
/tmp/milestone-review-${REVIEW_ID}.sh
|
||||
```
|
||||
|
||||
If the round failed, produced empty output, or reached operator-decision timeout, keep `.stderr`, `.status`, and `.runner.out` until the issue is diagnosed instead of deleting them immediately.
|
||||
|
||||
### Phase 6: Completion (REQUIRED SUB-SKILL)
|
||||
|
||||
After all milestones are approved and committed:
|
||||
|
||||
@@ -154,7 +154,20 @@ Do NOT push. After committing:
|
||||
REVIEW_ID=$(uuidgen | tr '[:upper:]' '[:lower:]' | head -c 8)
|
||||
```
|
||||
|
||||
Use for all temp file paths: `/tmp/milestone-${REVIEW_ID}.md` and `/tmp/milestone-review-${REVIEW_ID}.md`.
|
||||
Use `REVIEW_ID` for all milestone review temp file paths:
|
||||
- `/tmp/milestone-${REVIEW_ID}.md`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.md`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.json`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.stderr`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.status`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.runner.out`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.sh`
|
||||
|
||||
Resolve the shared runtime helper path before writing the command script:
|
||||
|
||||
```bash
|
||||
REVIEWER_RUNTIME=~/.config/opencode/skills/reviewer-runtime/run-review.sh
|
||||
```
|
||||
|
||||
#### Step 2: Write Review Payload
|
||||
|
||||
@@ -183,6 +196,13 @@ Write to `/tmp/milestone-${REVIEW_ID}.md`:
|
||||
|
||||
#### Step 3: Submit to Reviewer (Round 1)
|
||||
|
||||
Write the reviewer invocation to `/tmp/milestone-review-${REVIEW_ID}.sh` as a bash script:
|
||||
|
||||
```bash
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
```
|
||||
|
||||
**If `REVIEWER_CLI` is `codex`:**
|
||||
|
||||
```bash
|
||||
@@ -203,7 +223,7 @@ Be specific and actionable. If solid, end with exactly: VERDICT: APPROVED
|
||||
If changes are needed, end with exactly: VERDICT: REVISE"
|
||||
```
|
||||
|
||||
Capture the Codex session ID from output (line `session id: <uuid>`). Store as `CODEX_SESSION_ID`.
|
||||
Do not try to capture the Codex session ID yet. When using the helper, extract it from `/tmp/milestone-review-${REVIEW_ID}.runner.out` after the command completes (look for `session id: <uuid>`), then store it as `CODEX_SESSION_ID` for resume in subsequent rounds.
|
||||
|
||||
**If `REVIEWER_CLI` is `claude`:**
|
||||
|
||||
@@ -221,8 +241,7 @@ Evaluate:
|
||||
Be specific and actionable. If solid, end with exactly: VERDICT: APPROVED
|
||||
If changes are needed, end with exactly: VERDICT: REVISE" \
|
||||
--model ${REVIEWER_MODEL} \
|
||||
--allowedTools Read \
|
||||
> /tmp/milestone-review-${REVIEW_ID}.md
|
||||
--allowedTools Read
|
||||
```
|
||||
|
||||
**If `REVIEWER_CLI` is `cursor`:**
|
||||
@@ -247,19 +266,43 @@ If changes are needed, end with exactly: VERDICT: REVISE" \
|
||||
> /tmp/milestone-review-${REVIEW_ID}.json
|
||||
```
|
||||
|
||||
Extract session ID and review text (requires `jq`):
|
||||
For `cursor`, the command script writes raw JSON to `/tmp/milestone-review-${REVIEW_ID}.json`. Do not run `jq` extraction until after the helper or fallback execution completes. If `jq` is not installed, inform the user: `brew install jq` (macOS) or equivalent.
|
||||
|
||||
Run the command script through the shared helper when available:
|
||||
|
||||
```bash
|
||||
if [ -x "$REVIEWER_RUNTIME" ]; then
|
||||
"$REVIEWER_RUNTIME" \
|
||||
--command-file /tmp/milestone-review-${REVIEW_ID}.sh \
|
||||
--stdout-file /tmp/milestone-review-${REVIEW_ID}.runner.out \
|
||||
--stderr-file /tmp/milestone-review-${REVIEW_ID}.stderr \
|
||||
--status-file /tmp/milestone-review-${REVIEW_ID}.status
|
||||
else
|
||||
echo "Warning: reviewer runtime helper not found at $REVIEWER_RUNTIME; falling back to direct synchronous review." >&2
|
||||
bash /tmp/milestone-review-${REVIEW_ID}.sh >/tmp/milestone-review-${REVIEW_ID}.runner.out 2>/tmp/milestone-review-${REVIEW_ID}.stderr
|
||||
fi
|
||||
```
|
||||
|
||||
After the command completes:
|
||||
- If `REVIEWER_CLI=cursor`, extract the final review text:
|
||||
|
||||
```bash
|
||||
CURSOR_SESSION_ID=$(jq -r '.session_id' /tmp/milestone-review-${REVIEW_ID}.json)
|
||||
jq -r '.result' /tmp/milestone-review-${REVIEW_ID}.json > /tmp/milestone-review-${REVIEW_ID}.md
|
||||
```
|
||||
|
||||
If `jq` is not installed, inform the user: `brew install jq` (macOS) or equivalent.
|
||||
- If `REVIEWER_CLI=codex`, extract `CODEX_SESSION_ID` from `/tmp/milestone-review-${REVIEW_ID}.runner.out` after the helper or fallback run. If the review text is only in `.runner.out`, move or copy the actual review body into `/tmp/milestone-review-${REVIEW_ID}.md` before verdict parsing.
|
||||
|
||||
Fallback is allowed only when the helper is missing or not executable.
|
||||
|
||||
#### Step 4: Read Review & Check Verdict
|
||||
|
||||
1. Read `/tmp/milestone-review-${REVIEW_ID}.md`
|
||||
2. Present review to the user:
|
||||
2. If the review failed, produced empty output, or reached helper timeout, also read:
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.stderr`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.status`
|
||||
- `/tmp/milestone-review-${REVIEW_ID}.runner.out`
|
||||
3. Present review to the user:
|
||||
|
||||
```
|
||||
## Milestone Review — Round N (reviewer: ${REVIEWER_CLI} / ${REVIEWER_MODEL})
|
||||
@@ -267,10 +310,12 @@ If `jq` is not installed, inform the user: `brew install jq` (macOS) or equivale
|
||||
[Reviewer feedback]
|
||||
```
|
||||
|
||||
3. Check verdict:
|
||||
4. Check verdict:
|
||||
- **VERDICT: APPROVED** -> proceed to Phase 5 Step 6 (commit & approve)
|
||||
- **VERDICT: REVISE** -> go to Step 5
|
||||
- No clear verdict but positive / no actionable items -> treat as approved
|
||||
- Helper state `completed-empty-output` -> treat as failed review attempt, surface stderr/status, fix invocation or prompt handling, then retry
|
||||
- Helper state `needs-operator-decision` -> surface status log, note any `stall-warning` heartbeat lines as non-terminal operator hints, and decide whether to keep waiting, abort, or retry with different helper parameters
|
||||
- Max rounds (`MAX_ROUNDS`) reached -> present to user for manual decision (proceed or stop)
|
||||
|
||||
#### Step 5: Address Feedback & Re-verify
|
||||
@@ -290,6 +335,8 @@ If a revision contradicts the user's explicit requirements, skip it and note it
|
||||
|
||||
#### Step 6: Re-submit to Reviewer (Rounds 2-N)
|
||||
|
||||
Rewrite `/tmp/milestone-review-${REVIEW_ID}.sh` for the next round. The script should contain the reviewer invocation only; do not run it directly.
|
||||
|
||||
**If `REVIEWER_CLI` is `codex`:**
|
||||
|
||||
Resume the existing session:
|
||||
@@ -348,20 +395,31 @@ Changes made:
|
||||
Re-review. If solid, end with: VERDICT: APPROVED
|
||||
If more changes needed, end with: VERDICT: REVISE" \
|
||||
> /tmp/milestone-review-${REVIEW_ID}.json
|
||||
|
||||
jq -r '.result' /tmp/milestone-review-${REVIEW_ID}.json > /tmp/milestone-review-${REVIEW_ID}.md
|
||||
```
|
||||
|
||||
If resume fails, fall back to fresh `cursor-agent -p` with context about prior rounds.
|
||||
|
||||
Do not run `jq` extraction until after the helper or fallback execution completes, then extract `/tmp/milestone-review-${REVIEW_ID}.md` from the JSON response.
|
||||
|
||||
After updating `/tmp/milestone-review-${REVIEW_ID}.sh`, run the same helper/fallback flow from Round 1.
|
||||
|
||||
Return to Step 4.
|
||||
|
||||
#### Step 7: Cleanup Per Milestone
|
||||
|
||||
```bash
|
||||
rm -f /tmp/milestone-${REVIEW_ID}.md /tmp/milestone-review-${REVIEW_ID}.md /tmp/milestone-review-${REVIEW_ID}.json
|
||||
rm -f \
|
||||
/tmp/milestone-${REVIEW_ID}.md \
|
||||
/tmp/milestone-review-${REVIEW_ID}.md \
|
||||
/tmp/milestone-review-${REVIEW_ID}.json \
|
||||
/tmp/milestone-review-${REVIEW_ID}.stderr \
|
||||
/tmp/milestone-review-${REVIEW_ID}.status \
|
||||
/tmp/milestone-review-${REVIEW_ID}.runner.out \
|
||||
/tmp/milestone-review-${REVIEW_ID}.sh
|
||||
```
|
||||
|
||||
If the round failed, produced empty output, or reached operator-decision timeout, keep `.stderr`, `.status`, and `.runner.out` until the issue is diagnosed instead of deleting them immediately.
|
||||
|
||||
### Phase 7: Completion (REQUIRED SUB-SKILL)
|
||||
|
||||
After all milestones are approved and committed:
|
||||
|
||||
305
skills/reviewer-runtime/run-review.sh
Executable file
305
skills/reviewer-runtime/run-review.sh
Executable file
@@ -0,0 +1,305 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
DEFAULT_POLL_SECONDS=10
|
||||
DEFAULT_SOFT_TIMEOUT_SECONDS=600
|
||||
DEFAULT_STALL_WARNING_SECONDS=300
|
||||
DEFAULT_HARD_TIMEOUT_SECONDS=1800
|
||||
EXIT_COMPLETED_EMPTY_OUTPUT=80
|
||||
EXIT_NEEDS_OPERATOR_DECISION=81
|
||||
|
||||
COMMAND_FILE=""
|
||||
STDOUT_FILE=""
|
||||
STDERR_FILE=""
|
||||
STATUS_FILE=""
|
||||
POLL_SECONDS=$DEFAULT_POLL_SECONDS
|
||||
SOFT_TIMEOUT_SECONDS=$DEFAULT_SOFT_TIMEOUT_SECONDS
|
||||
STALL_WARNING_SECONDS=$DEFAULT_STALL_WARNING_SECONDS
|
||||
HARD_TIMEOUT_SECONDS=$DEFAULT_HARD_TIMEOUT_SECONDS
|
||||
|
||||
CHILD_PID=""
|
||||
USE_GROUP_KILL=0
|
||||
INTERRUPTED=0
|
||||
|
||||
usage() {
|
||||
cat <<'EOF'
|
||||
Usage:
|
||||
run-review.sh \
|
||||
--command-file <path> \
|
||||
--stdout-file <path> \
|
||||
--stderr-file <path> \
|
||||
--status-file <path> \
|
||||
[--poll-seconds <int>] \
|
||||
[--soft-timeout-seconds <int>] \
|
||||
[--stall-warning-seconds <int>] \
|
||||
[--hard-timeout-seconds <int>]
|
||||
EOF
|
||||
}
|
||||
|
||||
fail_usage() {
|
||||
echo "Error: $*" >&2
|
||||
usage >&2
|
||||
exit 2
|
||||
}
|
||||
|
||||
require_integer() {
|
||||
local name=$1
|
||||
local value=$2
|
||||
[[ "$value" =~ ^[0-9]+$ ]] || fail_usage "$name must be an integer"
|
||||
}
|
||||
|
||||
escape_note() {
|
||||
local note=$1
|
||||
note=${note//$'\n'/ }
|
||||
note=${note//\"/\'}
|
||||
printf '%s' "$note"
|
||||
}
|
||||
|
||||
iso_timestamp() {
|
||||
date -u +"%Y-%m-%dT%H:%M:%SZ"
|
||||
}
|
||||
|
||||
elapsed_seconds() {
|
||||
local now
|
||||
now=$(date +%s)
|
||||
printf '%s' $((now - START_TIME))
|
||||
}
|
||||
|
||||
file_bytes() {
|
||||
local path=$1
|
||||
if [[ -f "$path" ]]; then
|
||||
wc -c <"$path" | tr -d '[:space:]'
|
||||
else
|
||||
printf '0'
|
||||
fi
|
||||
}
|
||||
|
||||
append_status() {
|
||||
local level=$1
|
||||
local state=$2
|
||||
local note=$3
|
||||
local elapsed pid stdout_bytes stderr_bytes line
|
||||
|
||||
elapsed=$(elapsed_seconds)
|
||||
pid=${CHILD_PID:-0}
|
||||
stdout_bytes=$(file_bytes "$STDOUT_FILE")
|
||||
stderr_bytes=$(file_bytes "$STDERR_FILE")
|
||||
line="ts=$(iso_timestamp) level=$level state=$state elapsed_s=$elapsed pid=$pid stdout_bytes=$stdout_bytes stderr_bytes=$stderr_bytes note=\"$(escape_note "$note")\""
|
||||
|
||||
printf '%s\n' "$line" | tee -a "$STATUS_FILE"
|
||||
}
|
||||
|
||||
ensure_parent_dir() {
|
||||
local path=$1
|
||||
mkdir -p "$(dirname "$path")"
|
||||
}
|
||||
|
||||
kill_child_process_group() {
|
||||
if [[ -z "$CHILD_PID" ]]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
if ! kill -0 "$CHILD_PID" 2>/dev/null; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [[ "$USE_GROUP_KILL" -eq 1 ]]; then
|
||||
kill -TERM -- "-$CHILD_PID" 2>/dev/null || kill -TERM "$CHILD_PID" 2>/dev/null || true
|
||||
else
|
||||
kill -TERM "$CHILD_PID" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
sleep 1
|
||||
|
||||
if kill -0 "$CHILD_PID" 2>/dev/null; then
|
||||
if [[ "$USE_GROUP_KILL" -eq 1 ]]; then
|
||||
kill -KILL -- "-$CHILD_PID" 2>/dev/null || kill -KILL "$CHILD_PID" 2>/dev/null || true
|
||||
else
|
||||
kill -KILL "$CHILD_PID" 2>/dev/null || true
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
handle_signal() {
|
||||
local signal_name=$1
|
||||
INTERRUPTED=1
|
||||
append_status error failed "received SIG${signal_name}; terminating reviewer child"
|
||||
kill_child_process_group
|
||||
exit 130
|
||||
}
|
||||
|
||||
parse_args() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--command-file)
|
||||
COMMAND_FILE=${2:-}
|
||||
shift 2
|
||||
;;
|
||||
--stdout-file)
|
||||
STDOUT_FILE=${2:-}
|
||||
shift 2
|
||||
;;
|
||||
--stderr-file)
|
||||
STDERR_FILE=${2:-}
|
||||
shift 2
|
||||
;;
|
||||
--status-file)
|
||||
STATUS_FILE=${2:-}
|
||||
shift 2
|
||||
;;
|
||||
--poll-seconds)
|
||||
POLL_SECONDS=${2:-}
|
||||
shift 2
|
||||
;;
|
||||
--soft-timeout-seconds)
|
||||
SOFT_TIMEOUT_SECONDS=${2:-}
|
||||
shift 2
|
||||
;;
|
||||
--stall-warning-seconds)
|
||||
STALL_WARNING_SECONDS=${2:-}
|
||||
shift 2
|
||||
;;
|
||||
--hard-timeout-seconds)
|
||||
HARD_TIMEOUT_SECONDS=${2:-}
|
||||
shift 2
|
||||
;;
|
||||
--help|-h)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
fail_usage "unknown argument: $1"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
[[ -n "$COMMAND_FILE" ]] || fail_usage "--command-file is required"
|
||||
[[ -n "$STDOUT_FILE" ]] || fail_usage "--stdout-file is required"
|
||||
[[ -n "$STDERR_FILE" ]] || fail_usage "--stderr-file is required"
|
||||
[[ -n "$STATUS_FILE" ]] || fail_usage "--status-file is required"
|
||||
|
||||
require_integer "poll-seconds" "$POLL_SECONDS"
|
||||
require_integer "soft-timeout-seconds" "$SOFT_TIMEOUT_SECONDS"
|
||||
require_integer "stall-warning-seconds" "$STALL_WARNING_SECONDS"
|
||||
require_integer "hard-timeout-seconds" "$HARD_TIMEOUT_SECONDS"
|
||||
|
||||
[[ "$POLL_SECONDS" -gt 0 ]] || fail_usage "poll-seconds must be > 0"
|
||||
[[ "$SOFT_TIMEOUT_SECONDS" -gt 0 ]] || fail_usage "soft-timeout-seconds must be > 0"
|
||||
[[ "$STALL_WARNING_SECONDS" -gt 0 ]] || fail_usage "stall-warning-seconds must be > 0"
|
||||
[[ "$HARD_TIMEOUT_SECONDS" -gt 0 ]] || fail_usage "hard-timeout-seconds must be > 0"
|
||||
[[ "$SOFT_TIMEOUT_SECONDS" -le "$HARD_TIMEOUT_SECONDS" ]] || fail_usage "soft-timeout-seconds must be <= hard-timeout-seconds"
|
||||
[[ "$STALL_WARNING_SECONDS" -le "$HARD_TIMEOUT_SECONDS" ]] || fail_usage "stall-warning-seconds must be <= hard-timeout-seconds"
|
||||
|
||||
[[ -r "$COMMAND_FILE" ]] || fail_usage "command file is not readable: $COMMAND_FILE"
|
||||
}
|
||||
|
||||
launch_child() {
|
||||
if command -v setsid >/dev/null 2>&1; then
|
||||
setsid bash "$COMMAND_FILE" >"$STDOUT_FILE" 2>"$STDERR_FILE" &
|
||||
USE_GROUP_KILL=1
|
||||
else
|
||||
bash "$COMMAND_FILE" >"$STDOUT_FILE" 2>"$STDERR_FILE" &
|
||||
USE_GROUP_KILL=0
|
||||
fi
|
||||
CHILD_PID=$!
|
||||
}
|
||||
|
||||
main() {
|
||||
parse_args "$@"
|
||||
|
||||
ensure_parent_dir "$STDOUT_FILE"
|
||||
ensure_parent_dir "$STDERR_FILE"
|
||||
ensure_parent_dir "$STATUS_FILE"
|
||||
: >"$STDOUT_FILE"
|
||||
: >"$STDERR_FILE"
|
||||
: >"$STATUS_FILE"
|
||||
|
||||
START_TIME=$(date +%s)
|
||||
export START_TIME
|
||||
|
||||
trap 'handle_signal INT' INT
|
||||
trap 'handle_signal TERM' TERM
|
||||
trap 'if [[ "$INTERRUPTED" -eq 0 ]]; then kill_child_process_group; fi' EXIT
|
||||
|
||||
launch_child
|
||||
append_status info running-silent "reviewer child launched"
|
||||
|
||||
local last_stdout_bytes=0
|
||||
local last_stderr_bytes=0
|
||||
local last_output_change_time=$START_TIME
|
||||
local soft_timeout_logged=0
|
||||
local stall_warning_logged=0
|
||||
|
||||
while kill -0 "$CHILD_PID" 2>/dev/null; do
|
||||
sleep "$POLL_SECONDS"
|
||||
|
||||
local now elapsed stdout_bytes stderr_bytes note state level
|
||||
now=$(date +%s)
|
||||
elapsed=$((now - START_TIME))
|
||||
stdout_bytes=$(file_bytes "$STDOUT_FILE")
|
||||
stderr_bytes=$(file_bytes "$STDERR_FILE")
|
||||
|
||||
if [[ "$stdout_bytes" -ne "$last_stdout_bytes" || "$stderr_bytes" -ne "$last_stderr_bytes" ]]; then
|
||||
last_output_change_time=$now
|
||||
stall_warning_logged=0
|
||||
state=running-active
|
||||
level=info
|
||||
note="reviewer output changed"
|
||||
else
|
||||
local silent_for
|
||||
silent_for=$((now - last_output_change_time))
|
||||
if [[ "$silent_for" -ge "$STALL_WARNING_SECONDS" ]]; then
|
||||
state=stall-warning
|
||||
level=warn
|
||||
note="no output growth for ${silent_for}s; process still alive"
|
||||
stall_warning_logged=1
|
||||
else
|
||||
state=running-silent
|
||||
level=info
|
||||
note="reviewer process alive; waiting for output"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "$elapsed" -ge "$SOFT_TIMEOUT_SECONDS" && "$soft_timeout_logged" -eq 0 ]]; then
|
||||
note="$note; soft timeout reached, continuing while reviewer is alive"
|
||||
soft_timeout_logged=1
|
||||
fi
|
||||
|
||||
append_status "$level" "$state" "$note"
|
||||
last_stdout_bytes=$stdout_bytes
|
||||
last_stderr_bytes=$stderr_bytes
|
||||
|
||||
if [[ "$elapsed" -ge "$HARD_TIMEOUT_SECONDS" ]]; then
|
||||
append_status error needs-operator-decision "hard timeout reached; terminating reviewer child for operator intervention"
|
||||
kill_child_process_group
|
||||
trap - EXIT
|
||||
exit "$EXIT_NEEDS_OPERATOR_DECISION"
|
||||
fi
|
||||
done
|
||||
|
||||
local child_exit_code=0
|
||||
set +e
|
||||
wait "$CHILD_PID"
|
||||
child_exit_code=$?
|
||||
set -e
|
||||
trap - EXIT
|
||||
|
||||
local final_stdout_bytes final_stderr_bytes
|
||||
final_stdout_bytes=$(file_bytes "$STDOUT_FILE")
|
||||
final_stderr_bytes=$(file_bytes "$STDERR_FILE")
|
||||
|
||||
if [[ "$child_exit_code" -eq 0 ]]; then
|
||||
if [[ "$final_stdout_bytes" -gt 0 ]]; then
|
||||
append_status info completed "reviewer completed successfully"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
append_status error completed-empty-output "reviewer exited successfully with empty stdout"
|
||||
exit "$EXIT_COMPLETED_EMPTY_OUTPUT"
|
||||
fi
|
||||
|
||||
append_status error failed "reviewer exited with code $child_exit_code"
|
||||
exit "$child_exit_code"
|
||||
}
|
||||
|
||||
main "$@"
|
||||
263
skills/reviewer-runtime/tests/smoke-test.sh
Normal file
263
skills/reviewer-runtime/tests/smoke-test.sh
Normal file
@@ -0,0 +1,263 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
HELPER_PATH=$(cd "$SCRIPT_DIR/.." && pwd)/run-review.sh
|
||||
|
||||
fail() {
|
||||
echo "FAIL: $*" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
assert_file_contains() {
|
||||
local file=$1
|
||||
local pattern=$2
|
||||
if ! grep -qF "$pattern" "$file"; then
|
||||
echo "Expected pattern not found: $pattern" >&2
|
||||
echo "--- $file ---" >&2
|
||||
sed -n '1,200p' "$file" >&2 || true
|
||||
fail "missing pattern in $file"
|
||||
fi
|
||||
}
|
||||
|
||||
assert_exit_code() {
|
||||
local actual=$1
|
||||
local expected=$2
|
||||
if [[ "$actual" -ne "$expected" ]]; then
|
||||
fail "expected exit code $expected, got $actual"
|
||||
fi
|
||||
}
|
||||
|
||||
assert_nonzero_exit() {
|
||||
local actual=$1
|
||||
if [[ "$actual" -eq 0 ]]; then
|
||||
fail "expected non-zero exit code"
|
||||
fi
|
||||
}
|
||||
|
||||
make_command() {
|
||||
local file=$1
|
||||
local body=$2
|
||||
cat >"$file" <<EOF
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
$body
|
||||
EOF
|
||||
chmod +x "$file"
|
||||
}
|
||||
|
||||
run_helper() {
|
||||
local command_file=$1
|
||||
local stdout_file=$2
|
||||
local stderr_file=$3
|
||||
local status_file=$4
|
||||
shift 4
|
||||
|
||||
set +e
|
||||
"$HELPER_PATH" \
|
||||
--command-file "$command_file" \
|
||||
--stdout-file "$stdout_file" \
|
||||
--stderr-file "$stderr_file" \
|
||||
--status-file "$status_file" \
|
||||
"$@"
|
||||
local exit_code=$?
|
||||
set -e
|
||||
|
||||
return "$exit_code"
|
||||
}
|
||||
|
||||
test_delayed_success() {
|
||||
local dir=$1
|
||||
local command_file=$dir/delayed-success.sh
|
||||
local stdout_file=$dir/delayed-success.stdout
|
||||
local stderr_file=$dir/delayed-success.stderr
|
||||
local status_file=$dir/delayed-success.status
|
||||
|
||||
make_command "$command_file" '
|
||||
sleep 2
|
||||
printf "VERDICT: APPROVED\n"
|
||||
'
|
||||
|
||||
if run_helper "$command_file" "$stdout_file" "$stderr_file" "$status_file" \
|
||||
--poll-seconds 1 \
|
||||
--soft-timeout-seconds 5 \
|
||||
--stall-warning-seconds 3 \
|
||||
--hard-timeout-seconds 10; then
|
||||
local exit_code=0
|
||||
else
|
||||
local exit_code=$?
|
||||
fi
|
||||
|
||||
assert_exit_code "$exit_code" 0
|
||||
assert_file_contains "$stdout_file" "VERDICT: APPROVED"
|
||||
assert_file_contains "$status_file" "state=completed"
|
||||
}
|
||||
|
||||
test_soft_timeout_continues() {
|
||||
local dir=$1
|
||||
local command_file=$dir/soft-timeout.sh
|
||||
local stdout_file=$dir/soft-timeout.stdout
|
||||
local stderr_file=$dir/soft-timeout.stderr
|
||||
local status_file=$dir/soft-timeout.status
|
||||
|
||||
make_command "$command_file" '
|
||||
sleep 3
|
||||
printf "completed after soft timeout\n"
|
||||
'
|
||||
|
||||
if run_helper "$command_file" "$stdout_file" "$stderr_file" "$status_file" \
|
||||
--poll-seconds 1 \
|
||||
--soft-timeout-seconds 1 \
|
||||
--stall-warning-seconds 2 \
|
||||
--hard-timeout-seconds 8; then
|
||||
local exit_code=0
|
||||
else
|
||||
local exit_code=$?
|
||||
fi
|
||||
|
||||
assert_exit_code "$exit_code" 0
|
||||
assert_file_contains "$stdout_file" "completed after soft timeout"
|
||||
assert_file_contains "$status_file" "state=completed"
|
||||
}
|
||||
|
||||
test_nonzero_failure() {
|
||||
local dir=$1
|
||||
local command_file=$dir/nonzero-failure.sh
|
||||
local stdout_file=$dir/nonzero-failure.stdout
|
||||
local stderr_file=$dir/nonzero-failure.stderr
|
||||
local status_file=$dir/nonzero-failure.status
|
||||
|
||||
make_command "$command_file" '
|
||||
printf "boom\n" >&2
|
||||
exit 7
|
||||
'
|
||||
|
||||
if run_helper "$command_file" "$stdout_file" "$stderr_file" "$status_file" \
|
||||
--poll-seconds 1 \
|
||||
--soft-timeout-seconds 5 \
|
||||
--stall-warning-seconds 3 \
|
||||
--hard-timeout-seconds 10; then
|
||||
local exit_code=0
|
||||
else
|
||||
local exit_code=$?
|
||||
fi
|
||||
|
||||
assert_exit_code "$exit_code" 7
|
||||
assert_file_contains "$stderr_file" "boom"
|
||||
assert_file_contains "$status_file" "state=failed"
|
||||
}
|
||||
|
||||
test_empty_output_is_terminal() {
|
||||
local dir=$1
|
||||
local command_file=$dir/empty-output.sh
|
||||
local stdout_file=$dir/empty-output.stdout
|
||||
local stderr_file=$dir/empty-output.stderr
|
||||
local status_file=$dir/empty-output.status
|
||||
|
||||
make_command "$command_file" '
|
||||
sleep 1
|
||||
exit 0
|
||||
'
|
||||
|
||||
if run_helper "$command_file" "$stdout_file" "$stderr_file" "$status_file" \
|
||||
--poll-seconds 1 \
|
||||
--soft-timeout-seconds 5 \
|
||||
--stall-warning-seconds 3 \
|
||||
--hard-timeout-seconds 10; then
|
||||
local exit_code=0
|
||||
else
|
||||
local exit_code=$?
|
||||
fi
|
||||
|
||||
assert_nonzero_exit "$exit_code"
|
||||
assert_file_contains "$status_file" "state=completed-empty-output"
|
||||
}
|
||||
|
||||
test_signal_cleanup() {
|
||||
local dir=$1
|
||||
local command_file=$dir/signal-child.sh
|
||||
local stdout_file=$dir/signal-child.stdout
|
||||
local stderr_file=$dir/signal-child.stderr
|
||||
local status_file=$dir/signal-child.status
|
||||
local child_pid_file=$dir/child.pid
|
||||
|
||||
make_command "$command_file" "
|
||||
printf '%s\n' \"\$\$\" > \"$child_pid_file\"
|
||||
sleep 30
|
||||
"
|
||||
|
||||
set +e
|
||||
"$HELPER_PATH" \
|
||||
--command-file "$command_file" \
|
||||
--stdout-file "$stdout_file" \
|
||||
--stderr-file "$stderr_file" \
|
||||
--status-file "$status_file" \
|
||||
--poll-seconds 1 \
|
||||
--soft-timeout-seconds 5 \
|
||||
--stall-warning-seconds 2 \
|
||||
--hard-timeout-seconds 10 &
|
||||
local helper_pid=$!
|
||||
set -e
|
||||
|
||||
sleep 2
|
||||
kill -TERM "$helper_pid"
|
||||
|
||||
set +e
|
||||
wait "$helper_pid"
|
||||
local exit_code=$?
|
||||
set -e
|
||||
|
||||
assert_nonzero_exit "$exit_code"
|
||||
[[ -f "$child_pid_file" ]] || fail "child pid file was not written"
|
||||
|
||||
local child_pid
|
||||
child_pid=$(cat "$child_pid_file")
|
||||
sleep 1
|
||||
if kill -0 "$child_pid" 2>/dev/null; then
|
||||
fail "child process is still alive after helper termination"
|
||||
fi
|
||||
}
|
||||
|
||||
test_hard_timeout_escalation() {
|
||||
local dir=$1
|
||||
local command_file=$dir/hard-timeout.sh
|
||||
local stdout_file=$dir/hard-timeout.stdout
|
||||
local stderr_file=$dir/hard-timeout.stderr
|
||||
local status_file=$dir/hard-timeout.status
|
||||
|
||||
make_command "$command_file" '
|
||||
sleep 30
|
||||
'
|
||||
|
||||
if run_helper "$command_file" "$stdout_file" "$stderr_file" "$status_file" \
|
||||
--poll-seconds 1 \
|
||||
--soft-timeout-seconds 2 \
|
||||
--stall-warning-seconds 2 \
|
||||
--hard-timeout-seconds 4; then
|
||||
local exit_code=0
|
||||
else
|
||||
local exit_code=$?
|
||||
fi
|
||||
|
||||
assert_exit_code "$exit_code" 81
|
||||
assert_file_contains "$status_file" "state=needs-operator-decision"
|
||||
}
|
||||
|
||||
main() {
|
||||
[[ -x "$HELPER_PATH" ]] || fail "helper is not executable: $HELPER_PATH"
|
||||
|
||||
local tmp_dir
|
||||
tmp_dir=$(mktemp -d)
|
||||
trap "rm -rf '$tmp_dir'" EXIT
|
||||
|
||||
test_delayed_success "$tmp_dir"
|
||||
test_soft_timeout_continues "$tmp_dir"
|
||||
test_nonzero_failure "$tmp_dir"
|
||||
test_empty_output_is_terminal "$tmp_dir"
|
||||
test_signal_cleanup "$tmp_dir"
|
||||
test_hard_timeout_escalation "$tmp_dir"
|
||||
|
||||
echo "PASS: reviewer runtime smoke tests"
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user