From 63a048a26c2f2387eb5835f87dad08d8f85cde3e Mon Sep 17 00:00:00 2001 From: Stefano Fiorini Date: Tue, 24 Mar 2026 11:45:58 -0500 Subject: [PATCH] Align reviewer runtime and Telegram notifications --- docs/CREATE-PLAN.md | 43 +++- docs/IMPLEMENT-PLAN.md | 35 +++- docs/README.md | 1 + docs/TELEGRAM-NOTIFICATIONS.md | 96 +++++++++ skills/create-plan/claude-code/SKILL.md | 177 ++++++++++++++--- skills/create-plan/codex/SKILL.md | 181 ++++++++++++++--- skills/create-plan/cursor/SKILL.md | 185 +++++++++++++++--- skills/create-plan/opencode/SKILL.md | 175 ++++++++++++++--- skills/implement-plan/claude-code/SKILL.md | 165 ++++++++++++++-- skills/implement-plan/codex/SKILL.md | 167 ++++++++++++++-- skills/implement-plan/cursor/SKILL.md | 171 ++++++++++++++-- skills/implement-plan/opencode/SKILL.md | 165 ++++++++++++++-- skills/reviewer-runtime/notify-telegram.sh | 99 ++++++++++ skills/reviewer-runtime/run-review.sh | 53 +++++ .../tests/claude-review-template-guard.sh | 3 + skills/reviewer-runtime/tests/smoke-test.sh | 82 ++++++++ .../tests/telegram-notify-test.sh | 158 +++++++++++++++ 17 files changed, 1756 insertions(+), 200 deletions(-) create mode 100644 docs/TELEGRAM-NOTIFICATIONS.md create mode 100755 skills/reviewer-runtime/notify-telegram.sh mode change 100644 => 100755 skills/reviewer-runtime/tests/smoke-test.sh create mode 100755 skills/reviewer-runtime/tests/telegram-notify-test.sh diff --git a/docs/CREATE-PLAN.md b/docs/CREATE-PLAN.md index 74326ae..59f4ce2 100644 --- a/docs/CREATE-PLAN.md +++ b/docs/CREATE-PLAN.md @@ -18,6 +18,7 @@ Create structured implementation plans with milestone and story tracking, and op - Claude Code: `~/.claude/skills/reviewer-runtime/run-review.sh` - OpenCode: `~/.config/opencode/skills/reviewer-runtime/run-review.sh` - Cursor: `.cursor/skills/reviewer-runtime/run-review.sh` or `~/.cursor/skills/reviewer-runtime/run-review.sh` +- Telegram notification setup is documented in [TELEGRAM-NOTIFICATIONS.md](./TELEGRAM-NOTIFICATIONS.md) If dependencies are missing, stop and return: @@ -115,10 +116,13 @@ Verify Superpowers dependencies exist in your agent skills root: - Creates plans under `ai_plan/YYYY-MM-DD-/`. - Ensures `/ai_plan/` is in `.gitignore`. - Commits `.gitignore` update locally when added. -- Asks which reviewer CLI and model to use (or accepts `skip` for no review). -- Iteratively reviews the plan with the chosen reviewer (max 5 rounds) before generating files. +- Asks which reviewer CLI, model, and max rounds to use (or accepts `skip` for no review). +- Iteratively reviews the plan with the chosen reviewer (default max 10 rounds) before generating files. - Runs reviewer commands through `reviewer-runtime/run-review.sh` when available, with fallback to direct synchronous execution only if the helper is missing. +- Waits as long as the reviewer runtime keeps emitting per-minute `In progress N` heartbeats. +- Requires reviewer findings to be ordered `P0` through `P3`, with `P3` explicitly non-blocking. - Captures reviewer stderr and helper status logs for diagnostics and retains them on failed, empty-output, or operator-decision review rounds. +- Sends completion notifications through Telegram only when the shared setup in [TELEGRAM-NOTIFICATIONS.md](./TELEGRAM-NOTIFICATIONS.md) is installed and configured. - Produces: - `original-plan.md` - `final-transcript.md` @@ -130,13 +134,24 @@ Verify Superpowers dependencies exist in your agent skills root: After the plan is created (design + milestones + stories), the skill sends it to a second model for review: -1. **Configure** — user picks a reviewer CLI (`codex`, `claude`, `cursor`) and model, or skips +1. **Configure** — user picks a reviewer CLI (`codex`, `claude`, `cursor`), a model, and optional max rounds (default 10), or skips 2. **Prepare** — plan payload and a bash reviewer command script are written to temp files 3. **Run** — the command script is executed through `reviewer-runtime/run-review.sh` when installed -4. **Feedback** — reviewer evaluates correctness, risks, missing steps, alternatives, security -5. **Revise** — the planning agent addresses each issue and re-submits -6. **Repeat** — up to 5 rounds until the reviewer returns `VERDICT: APPROVED` -7. **Finalize** — approved plan is used to generate the plan file package +4. **Feedback** — reviewer evaluates correctness, risks, missing steps, alternatives, security, and returns `## Summary`, `## Findings`, and `## Verdict` +5. **Prioritize** — findings are ordered `P0`, `P1`, `P2`, `P3` +6. **Revise** — the planning agent addresses findings in priority order and re-submits +7. **Repeat** — up to max rounds until the reviewer returns `VERDICT: APPROVED` +8. **Finalize** — approved plan is used to generate the plan file package + +### Reviewer Output Contract + +- `P0` = total blocker +- `P1` = major risk +- `P2` = must-fix before approval +- `P3` = cosmetic / nice to have +- Each severity section should use `- None.` when empty +- `VERDICT: APPROVED` is valid only when no `P0`, `P1`, or `P2` findings remain +- `P3` findings are non-blocking, but the caller should still try to fix them when cheap and safe ### Runtime Artifacts @@ -153,16 +168,18 @@ The review flow may create these temp artifacts: Status log lines use this format: ```text -ts= level= state= elapsed_s= pid= stdout_bytes= stderr_bytes= note="" +ts= level= state= elapsed_s= pid= stdout_bytes= stderr_bytes= note="" ``` -`stall-warning` is a heartbeat/status-log state only. It is not a terminal review result. +`in-progress` is the liveness heartbeat emitted roughly once per minute with `note="In progress N"`. +`stall-warning` is a non-terminal status-log state only. It does not mean the caller should stop waiting if `in-progress` heartbeats continue. ### Failure Handling - `completed-empty-output` means the reviewer exited without producing review text; surface `.stderr` and `.status`, then retry only after diagnosing the cause. -- `needs-operator-decision` means the helper reached hard-timeout escalation; surface `.status` and decide whether to keep waiting, abort, or retry with different parameters. +- `needs-operator-decision` means the helper reached hard-timeout escalation; surface `.status` and decide whether to extend the timeout, abort, or retry with different parameters. - Successful rounds clean up temp artifacts. Failed, empty-output, and operator-decision rounds should retain `.stderr`, `.status`, and `.runner.out` until diagnosed. +- As long as fresh `in-progress` heartbeats continue to arrive roughly once per minute, the caller should keep waiting. ### Supported Reviewer CLIs @@ -178,6 +195,12 @@ For all three CLIs, the preferred execution path is: 2. run that script through `reviewer-runtime/run-review.sh` 3. fall back to direct synchronous execution only if the helper is missing or not executable +## Notifications + +- Telegram is the only supported completion notification path. +- Shared setup: [TELEGRAM-NOTIFICATIONS.md](./TELEGRAM-NOTIFICATIONS.md) +- Notification failures are non-blocking, but they must be surfaced to the user. + ## Template Guardrails All plan templates now include guardrail sections that enforce: diff --git a/docs/IMPLEMENT-PLAN.md b/docs/IMPLEMENT-PLAN.md index bc406f6..ea19e38 100644 --- a/docs/IMPLEMENT-PLAN.md +++ b/docs/IMPLEMENT-PLAN.md @@ -25,6 +25,7 @@ Execute an existing plan (created by `create-plan`) in an isolated git worktree, - Claude Code: `~/.claude/skills/reviewer-runtime/run-review.sh` - OpenCode: `~/.config/opencode/skills/reviewer-runtime/run-review.sh` - Cursor: `.cursor/skills/reviewer-runtime/run-review.sh` or `~/.cursor/skills/reviewer-runtime/run-review.sh` +- Telegram notification setup is documented in [TELEGRAM-NOTIFICATIONS.md](./TELEGRAM-NOTIFICATIONS.md) If dependencies are missing, stop and return: @@ -133,10 +134,13 @@ Verify Superpowers execution dependencies exist in your agent skills root: - Runs lint/typecheck/tests as a gate before each milestone review. - Sends each milestone to a reviewer CLI for approval (max rounds configurable, default 10). - Runs reviewer commands through `reviewer-runtime/run-review.sh` when available, with fallback to direct synchronous execution only if the helper is missing. +- Waits as long as the reviewer runtime keeps emitting per-minute `In progress N` heartbeats. +- Requires reviewer findings to be ordered `P0` through `P3`, with `P3` explicitly non-blocking. - Captures reviewer stderr and helper status logs for diagnostics and retains them on failed, empty-output, or operator-decision review rounds. - Commits each milestone locally only after reviewer approval (does not push). - After all milestones approved, merges worktree branch to parent and deletes worktree. - Supports resume: detects existing worktree and `in-dev`/`completed` stories. +- Sends completion notifications through Telegram only when the shared setup in [TELEGRAM-NOTIFICATIONS.md](./TELEGRAM-NOTIFICATIONS.md) is installed and configured. ## Milestone Review Loop @@ -145,11 +149,22 @@ After each milestone is implemented and verified, the skill sends it to a second 1. **Configure** — user picks a reviewer CLI (`codex`, `claude`, `cursor`) and model, or skips 2. **Prepare** — milestone payload and a bash reviewer command script are written to temp files 3. **Run** — the command script is executed through `reviewer-runtime/run-review.sh` when installed -4. **Feedback** — reviewer evaluates correctness, acceptance criteria, code quality, test coverage, security -5. **Revise** — the implementing agent addresses each issue, re-verifies, and re-submits -6. **Repeat** — up to max rounds (default 10) until the reviewer returns `VERDICT: APPROVED` +4. **Feedback** — reviewer evaluates correctness, acceptance criteria, code quality, test coverage, security, and returns `## Summary`, `## Findings`, and `## Verdict` +5. **Prioritize** — findings are ordered `P0`, `P1`, `P2`, `P3` +6. **Revise** — the implementing agent addresses findings in priority order, re-verifies, and re-submits +7. **Repeat** — up to max rounds (default 10) until the reviewer returns `VERDICT: APPROVED` 7. **Approve** — milestone is marked approved in `story-tracker.md` +### Reviewer Output Contract + +- `P0` = total blocker +- `P1` = major risk +- `P2` = must-fix before approval +- `P3` = cosmetic / nice to have +- Each severity section should use `- None.` when empty +- `VERDICT: APPROVED` is valid only when no `P0`, `P1`, or `P2` findings remain +- `P3` findings are non-blocking, but the caller should still try to fix them when cheap and safe + ### Runtime Artifacts The milestone review flow may create these temp artifacts: @@ -165,16 +180,18 @@ The milestone review flow may create these temp artifacts: Status log lines use this format: ```text -ts= level= state= elapsed_s= pid= stdout_bytes= stderr_bytes= note="" +ts= level= state= elapsed_s= pid= stdout_bytes= stderr_bytes= note="" ``` -`stall-warning` is a heartbeat/status-log state only. It is not a terminal review result. +`in-progress` is the liveness heartbeat emitted roughly once per minute with `note="In progress N"`. +`stall-warning` is a non-terminal status-log state only. It does not mean the caller should stop waiting if `in-progress` heartbeats continue. ### Failure Handling - `completed-empty-output` means the reviewer exited without producing review text; surface `.stderr` and `.status`, then retry only after diagnosing the cause. -- `needs-operator-decision` means the helper reached hard-timeout escalation; surface `.status` and decide whether to keep waiting, abort, or retry with different parameters. +- `needs-operator-decision` means the helper reached hard-timeout escalation; surface `.status` and decide whether to extend the timeout, abort, or retry with different parameters. - Successful rounds clean up temp artifacts. Failed, empty-output, and operator-decision rounds should retain `.stderr`, `.status`, and `.runner.out` until diagnosed. +- As long as fresh `in-progress` heartbeats continue to arrive roughly once per minute, the caller should keep waiting. ### Supported Reviewer CLIs @@ -190,6 +207,12 @@ For all three CLIs, the preferred execution path is: 2. run that script through `reviewer-runtime/run-review.sh` 3. fall back to direct synchronous execution only if the helper is missing or not executable +## Notifications + +- Telegram is the only supported completion notification path. +- Shared setup: [TELEGRAM-NOTIFICATIONS.md](./TELEGRAM-NOTIFICATIONS.md) +- Notification failures are non-blocking, but they must be surfaced to the user. + The helper also supports manual override flags for diagnostics: ```bash diff --git a/docs/README.md b/docs/README.md index 1e4a47c..aa6dc96 100644 --- a/docs/README.md +++ b/docs/README.md @@ -7,6 +7,7 @@ This directory contains user-facing docs for each skill. - [ATLASSIAN.md](./ATLASSIAN.md) — Includes requirements, generated bundle sync, install, auth, safety rules, and usage examples for the Atlassian skill. - [CREATE-PLAN.md](./CREATE-PLAN.md) — Includes requirements, install, verification, and execution workflow for create-plan. - [IMPLEMENT-PLAN.md](./IMPLEMENT-PLAN.md) — Includes requirements, install, verification, and milestone review workflow for implement-plan. +- [TELEGRAM-NOTIFICATIONS.md](./TELEGRAM-NOTIFICATIONS.md) — Shared Telegram notification setup used by reviewer-driven skills. - [WEB-AUTOMATION.md](./WEB-AUTOMATION.md) — Includes requirements, install, dependency verification, and usage examples for web-automation. ## Repo Setup diff --git a/docs/TELEGRAM-NOTIFICATIONS.md b/docs/TELEGRAM-NOTIFICATIONS.md new file mode 100644 index 0000000..77b0c4f --- /dev/null +++ b/docs/TELEGRAM-NOTIFICATIONS.md @@ -0,0 +1,96 @@ +# TELEGRAM-NOTIFICATIONS + +## Purpose + +Shared setup for Telegram completion notifications used by reviewer-driven skills such as `create-plan` and `implement-plan`. + +## Requirements + +- Telegram bot token in `TELEGRAM_BOT_TOKEN` +- Telegram chat id in `TELEGRAM_CHAT_ID` +- Notification helper installed beside the shared reviewer runtime: + - Codex: `~/.codex/skills/reviewer-runtime/notify-telegram.sh` + - Claude Code: `~/.claude/skills/reviewer-runtime/notify-telegram.sh` + - OpenCode: `~/.config/opencode/skills/reviewer-runtime/notify-telegram.sh` + - Cursor: `.cursor/skills/reviewer-runtime/notify-telegram.sh` or `~/.cursor/skills/reviewer-runtime/notify-telegram.sh` + +## Install + +The helper ships from `skills/reviewer-runtime/` together with `run-review.sh`. + +### Codex + +```bash +mkdir -p ~/.codex/skills/reviewer-runtime +cp -R skills/reviewer-runtime/* ~/.codex/skills/reviewer-runtime/ +``` + +### Claude Code + +```bash +mkdir -p ~/.claude/skills/reviewer-runtime +cp -R skills/reviewer-runtime/* ~/.claude/skills/reviewer-runtime/ +``` + +### OpenCode + +```bash +mkdir -p ~/.config/opencode/skills/reviewer-runtime +cp -R skills/reviewer-runtime/* ~/.config/opencode/skills/reviewer-runtime/ +``` + +### Cursor + +Repo-local install: + +```bash +mkdir -p .cursor/skills/reviewer-runtime +cp -R skills/reviewer-runtime/* .cursor/skills/reviewer-runtime/ +``` + +Global install: + +```bash +mkdir -p ~/.cursor/skills/reviewer-runtime +cp -R skills/reviewer-runtime/* ~/.cursor/skills/reviewer-runtime/ +``` + +## Verify Installation + +```bash +test -x ~/.codex/skills/reviewer-runtime/notify-telegram.sh || true +test -x ~/.claude/skills/reviewer-runtime/notify-telegram.sh || true +test -x ~/.config/opencode/skills/reviewer-runtime/notify-telegram.sh || true +test -x .cursor/skills/reviewer-runtime/notify-telegram.sh || test -x ~/.cursor/skills/reviewer-runtime/notify-telegram.sh || true +``` + +## Configure Telegram + +Export the required variables before running a skill that sends completion notifications: + +```bash +export TELEGRAM_BOT_TOKEN="" +export TELEGRAM_CHAT_ID="" +``` + +Optional: + +```bash +export TELEGRAM_API_BASE_URL="https://api.telegram.org" +``` + +## Test the Helper + +Example: + +```bash +TELEGRAM_BOT_TOKEN="" \ +TELEGRAM_CHAT_ID="" \ +skills/reviewer-runtime/notify-telegram.sh --message "Telegram notification test" +``` + +## Rules + +- Telegram is the only supported completion notification path for these skills. +- Notification failures are non-blocking, but they must be surfaced to the user. +- Skills should report when Telegram is not configured instead of silently pretending a notification was sent. diff --git a/skills/create-plan/claude-code/SKILL.md b/skills/create-plan/claude-code/SKILL.md index dc6ebe5..1ff3303 100644 --- a/skills/create-plan/claude-code/SKILL.md +++ b/skills/create-plan/claude-code/SKILL.md @@ -47,7 +47,10 @@ If the user has already specified a reviewer CLI and model (e.g., "create a plan - For `cursor`: **run `cursor-agent models` first** to see your account's available models (availability varies by subscription) - Accept any model string the user provides -Store the chosen `REVIEWER_CLI` and `REVIEWER_MODEL` for Phase 6 (Iterative Plan Review). +3. **Max review rounds for the plan?** (default: 10) + - If the user does not provide a value, set `MAX_ROUNDS=10`. + +Store the chosen `REVIEWER_CLI`, `REVIEWER_MODEL`, and `MAX_ROUNDS` for Phase 6 (Iterative Plan Review). ### Phase 4: Design (REQUIRED SUB-SKILL) - Invoke `superpowers:brainstorming` explicitly. @@ -61,7 +64,7 @@ Store the chosen `REVIEWER_CLI` and `REVIEWER_MODEL` for Phase 6 (Iterative Plan ### Phase 6: Iterative Plan Review -Send the plan to the configured reviewer CLI for feedback. Revise and re-submit until approved (max 5 rounds). +Send the plan to the configured reviewer CLI for feedback. Revise and re-submit until approved (default max 10 rounds). **Skip this phase entirely if reviewer was set to `skip`.** @@ -86,10 +89,60 @@ Resolve the shared reviewer helper from the installed Claude Code skills directo REVIEWER_RUNTIME=~/.claude/skills/reviewer-runtime/run-review.sh ``` +Set helper success-artifact args before writing the command script: + +```bash +HELPER_SUCCESS_FILE_ARGS=() +case "$REVIEWER_CLI" in + codex) + HELPER_SUCCESS_FILE_ARGS+=(--success-file /tmp/plan-review-${REVIEW_ID}.md) + ;; + cursor) + HELPER_SUCCESS_FILE_ARGS+=(--success-file /tmp/plan-review-${REVIEW_ID}.json) + ;; +esac +``` + #### Step 2: Write Plan to Temp File Write the complete plan (milestones, stories, design decisions, specs) to `/tmp/plan-${REVIEW_ID}.md`. +#### Review Contract (Applies to Every Round) + +The reviewer response must use this structure: + +```text +## Summary +... + +## Findings +### P0 +- ... +### P1 +- ... +### P2 +- ... +### P3 +- ... + +## Verdict +VERDICT: APPROVED +``` + +Rules: +- Order findings from `P0` to `P3`. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- Use `- None.` when a severity has no findings. +- `VERDICT: APPROVED` is allowed only when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking. +- The calling agent should still try to fix `P3` findings when they are cheap and safe. + +#### Liveness Contract (Applies While Review Is Running) + +- The shared reviewer runtime emits `state=in-progress note="In progress N"` heartbeats every 60 seconds while the reviewer child is alive. +- The calling agent must keep waiting as long as a fresh `In progress N` heartbeat keeps arriving roughly once per minute. +- Do not abort just because the review is slow, a soft timeout fired, or a `stall-warning` line appears, as long as the `In progress N` heartbeat continues. +- Treat missing heartbeats, `state=failed`, `state=completed-empty-output`, and `state=needs-operator-decision` as escalation signals. + #### Step 3: Submit to Reviewer (Round 1) Write the reviewer invocation to `/tmp/plan-review-${REVIEW_ID}.sh` as a bash script: @@ -113,8 +166,21 @@ codex exec \ 4. Alternatives — Is there a simpler or better approach? 5. Security — Any security concerns? -Be specific and actionable. If the plan is solid, end with exactly: VERDICT: APPROVED -If changes are needed, end with exactly: VERDICT: REVISE" +Return exactly these sections in order: +## Summary +## Findings +### P0 +### P1 +### P2 +### P3 +## Verdict + +Rules: +- Order findings from highest severity to lowest. +- Use `- None.` when a severity has no findings. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- End with exactly one verdict line: `VERDICT: APPROVED` or `VERDICT: REVISE` +- `VERDICT: APPROVED` is allowed only when there are no `P0`, `P1`, or `P2` findings. `P3` findings are non-blocking." ``` Do not try to capture the Codex session ID yet. When using the helper, extract it from `/tmp/plan-review-${REVIEW_ID}.runner.out` after the command completes (look for `session id: `), then store it as `CODEX_SESSION_ID` for resume in subsequent rounds. @@ -133,8 +199,21 @@ $(cat /tmp/plan-${REVIEW_ID}.md) 4. Alternatives — Is there a simpler or better approach? 5. Security — Any security concerns? -Be specific and actionable. If the plan is solid, end with exactly: VERDICT: APPROVED -If changes are needed, end with exactly: VERDICT: REVISE" \ +Return exactly these sections in order: +## Summary +## Findings +### P0 +### P1 +### P2 +### P3 +## Verdict + +Rules: +- Order findings from highest severity to lowest. +- Use `- None.` when a severity has no findings. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- End with exactly one verdict line: `VERDICT: APPROVED` or `VERDICT: REVISE` +- `VERDICT: APPROVED` is allowed only when there are no `P0`, `P1`, or `P2` findings. `P3` findings are non-blocking." \ --model ${REVIEWER_MODEL} \ --strict-mcp-config \ --setting-sources user @@ -155,8 +234,21 @@ cursor-agent -p \ 4. Alternatives — Is there a simpler or better approach? 5. Security — Any security concerns? -Be specific and actionable. If the plan is solid, end with exactly: VERDICT: APPROVED -If changes are needed, end with exactly: VERDICT: REVISE" \ +Return exactly these sections in order: +## Summary +## Findings +### P0 +### P1 +### P2 +### P3 +## Verdict + +Rules: +- Order findings from highest severity to lowest. +- Use `- None.` when a severity has no findings. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- End with exactly one verdict line: `VERDICT: APPROVED` or `VERDICT: REVISE` +- `VERDICT: APPROVED` is allowed only when there are no `P0`, `P1`, or `P2` findings. `P3` findings are non-blocking." \ > /tmp/plan-review-${REVIEW_ID}.json ``` @@ -170,13 +262,16 @@ if [ -x "$REVIEWER_RUNTIME" ]; then --command-file /tmp/plan-review-${REVIEW_ID}.sh \ --stdout-file /tmp/plan-review-${REVIEW_ID}.runner.out \ --stderr-file /tmp/plan-review-${REVIEW_ID}.stderr \ - --status-file /tmp/plan-review-${REVIEW_ID}.status + --status-file /tmp/plan-review-${REVIEW_ID}.status \ + "${HELPER_SUCCESS_FILE_ARGS[@]}" else echo "Warning: reviewer runtime helper not found at $REVIEWER_RUNTIME; falling back to direct synchronous review." >&2 bash /tmp/plan-review-${REVIEW_ID}.sh >/tmp/plan-review-${REVIEW_ID}.runner.out 2>/tmp/plan-review-${REVIEW_ID}.stderr fi ``` +Run the helper in the foreground and watch its live stdout for `state=in-progress` heartbeats. If your agent environment buffers command output until exit, start the helper in the background and poll `/tmp/plan-review-${REVIEW_ID}.status` separately instead of treating heartbeats as post-hoc-only data. + After the command completes: - If `REVIEWER_CLI=cursor`, extract the final review text: @@ -186,6 +281,13 @@ jq -r '.result' /tmp/plan-review-${REVIEW_ID}.json > /tmp/plan-review-${REVIEW_I ``` - If `REVIEWER_CLI=codex`, extract `CODEX_SESSION_ID` from `/tmp/plan-review-${REVIEW_ID}.runner.out` after the helper or fallback run. If the review text is only in `.runner.out`, move or copy the actual review body into `/tmp/plan-review-${REVIEW_ID}.md` before verdict parsing. +- If `REVIEWER_CLI=claude`, promote stdout captured by the helper or fallback runner into the markdown review file: + +```bash +cp /tmp/plan-review-${REVIEW_ID}.runner.out /tmp/plan-review-${REVIEW_ID}.md +``` + +Fallback is allowed only when the helper is missing or not executable. #### Step 4: Read Review & Check Verdict @@ -202,17 +304,19 @@ jq -r '.result' /tmp/plan-review-${REVIEW_ID}.json > /tmp/plan-review-${REVIEW_I [Reviewer feedback] ``` -3. Check verdict: - - **VERDICT: APPROVED** → proceed to Phase 7 (Initialize workspace) - - **VERDICT: REVISE** → go to Step 5 - - No clear verdict but positive / no actionable items → treat as approved +4. While the reviewer is still running, keep waiting as long as fresh `state=in-progress note="In progress N"` heartbeats continue to appear roughly once per minute. +5. Check verdict: + - **VERDICT: APPROVED** with no `P0`, `P1`, or `P2` findings → proceed to Phase 7 (Initialize workspace) + - **VERDICT: APPROVED** with only `P3` findings → optionally fix the `P3` items if they are cheap and safe, then proceed + - **VERDICT: REVISE** or any `P0`, `P1`, or `P2` finding → go to Step 5 + - No clear verdict but `P0`, `P1`, and `P2` are all `- None.` → treat as approved - Helper state `completed-empty-output` → treat as failed review attempt, surface stderr/status, fix invocation or prompt handling, then retry - - Helper state `needs-operator-decision` → surface status log and decide whether to keep waiting, abort, or retry with different helper parameters - - Max rounds (5) reached → proceed with warning + - Helper state `needs-operator-decision` → surface status log and decide whether to extend the timeout, abort, or retry with different helper parameters + - Max rounds (`MAX_ROUNDS`) reached → present the outcome to the user for a manual decision (proceed or stop) #### Step 5: Revise the Plan -Address each issue the reviewer raised. Update the plan in conversation context and rewrite `/tmp/plan-${REVIEW_ID}.md`. +Address the reviewer findings in priority order (`P0` → `P1` → `P2`, then `P3` when practical). Update the plan in conversation context and rewrite `/tmp/plan-${REVIEW_ID}.md`. Summarize revisions for the user: @@ -223,7 +327,9 @@ Summarize revisions for the user: If a revision contradicts the user's explicit requirements, skip it and note it for the user. -#### Step 6: Re-submit to Reviewer (Rounds 2-5) +#### Step 6: Re-submit to Reviewer (Rounds 2-N) + +Rewrite `/tmp/plan-review-${REVIEW_ID}.sh` for the next round. The script should contain the reviewer invocation only; do not run it directly. **If `REVIEWER_CLI` is `codex`:** @@ -237,8 +343,8 @@ codex exec resume ${CODEX_SESSION_ID} \ Changes made: [List specific changes] -Re-review. If solid, end with: VERDICT: APPROVED -If more changes needed, end with: VERDICT: REVISE" +Re-review using the same `## Summary`, `## Findings`, and `## Verdict` structure as before. +Keep findings ordered `P0` to `P3`, use `- None.` when a severity has no findings, and only use `VERDICT: APPROVED` when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking." ``` If resume fails (session expired), fall back to fresh `codex exec` with context about prior rounds. @@ -260,8 +366,8 @@ $(cat /tmp/plan-${REVIEW_ID}.md) Changes made: [List specific changes] -Re-review the full plan. If solid, end with: VERDICT: APPROVED -If more changes needed, end with: VERDICT: REVISE" \ +Re-review the full plan using the same `## Summary`, `## Findings`, and `## Verdict` structure as before. +Keep findings ordered `P0` to `P3`, use `- None.` when a severity has no findings, and only use `VERDICT: APPROVED` when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking." \ --model ${REVIEWER_MODEL} \ --strict-mcp-config \ --setting-sources user @@ -282,8 +388,8 @@ cursor-agent --resume ${CURSOR_SESSION_ID} -p \ Changes made: [List specific changes] -Re-review. If solid, end with: VERDICT: APPROVED -If more changes needed, end with: VERDICT: REVISE" \ +Re-review using the same `## Summary`, `## Findings`, and `## Verdict` structure as before. +Keep findings ordered `P0` to `P3`, use `- None.` when a severity has no findings, and only use `VERDICT: APPROVED` when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking." \ > /tmp/plan-review-${REVIEW_ID}.json jq -r '.result' /tmp/plan-review-${REVIEW_ID}.json > /tmp/plan-review-${REVIEW_ID}.md @@ -302,7 +408,7 @@ Return to Step 4. **Status:** Approved after N round(s) [or] -**Status:** Max rounds (5) reached — not fully approved +**Status:** Max rounds (`MAX_ROUNDS`) reached — not fully approved [Final feedback / remaining concerns] ``` @@ -354,6 +460,27 @@ When handing off to execution, instruct: Private plan files under `~/.claude/plans/` are planning artifacts and must not be used as execution source of truth. +### Phase 10: Telegram Completion Notification (MANDATORY) + +Resolve the Telegram notifier helper from the installed Claude Code skills directory: + +```bash +TELEGRAM_NOTIFY_RUNTIME=~/.claude/skills/reviewer-runtime/notify-telegram.sh +``` + +On every terminal outcome for the create-plan run (approved, max rounds reached, skipped reviewer, or failure), send a Telegram summary if the helper exists and both `TELEGRAM_BOT_TOKEN` and `TELEGRAM_CHAT_ID` are configured: + +```bash +if [ -x "$TELEGRAM_NOTIFY_RUNTIME" ] && [ -n "${TELEGRAM_BOT_TOKEN:-}" ] && [ -n "${TELEGRAM_CHAT_ID:-}" ]; then + "$TELEGRAM_NOTIFY_RUNTIME" --message "create-plan completed for : " +fi +``` + +Rules: +- Telegram is the only supported completion notification path. Do not use desktop notifications, `say`, email, or any other notifier. +- Notification failures are non-blocking, but they must be surfaced to the user. +- If Telegram is not configured, state that no completion notification was sent. + ## Tracker Discipline (MANDATORY) **ALWAYS update `story-tracker.md` before/after each story. NEVER proceed with stale tracker state.** @@ -392,6 +519,7 @@ After completing any story: - [ ] `.gitignore` ignore-rule commit was created if needed - [ ] Plan directory created under `ai_plan/YYYY-MM-DD-/` - [ ] Reviewer configured or explicitly skipped +- [ ] Max review rounds confirmed (default: 10) - [ ] Plan review completed (approved or max rounds) — or skipped - [ ] `original-plan.md` copied from `~/.claude/plans/` plan file - [ ] `final-transcript.md` present @@ -399,6 +527,7 @@ After completing any story: - [ ] `story-tracker.md` created with all stories as `pending` - [ ] `continuation-runbook.md` present - [ ] Handoff explicitly says to read runbook first and execute from plan folder +- [ ] Telegram completion notification attempted if configured ## Exit Triggers for Question Phase User says: "ready", "done", "let's plan", "proceed", "enough questions" diff --git a/skills/create-plan/codex/SKILL.md b/skills/create-plan/codex/SKILL.md index d3f9e03..040f9c3 100644 --- a/skills/create-plan/codex/SKILL.md +++ b/skills/create-plan/codex/SKILL.md @@ -72,7 +72,10 @@ If the user has already specified a reviewer CLI and model (e.g., "create a plan - For `cursor`: **run `cursor-agent models` first** to see your account's available models (availability varies by subscription) - Accept any model string the user provides -Store the chosen `REVIEWER_CLI` and `REVIEWER_MODEL` for Phase 6 (Iterative Plan Review). +3. **Max review rounds for the plan?** (default: 10) + - If the user does not provide a value, set `MAX_ROUNDS=10`. + +Store the chosen `REVIEWER_CLI`, `REVIEWER_MODEL`, and `MAX_ROUNDS` for Phase 6 (Iterative Plan Review). ### Phase 4: Design (REQUIRED SUB-SKILL) @@ -84,7 +87,7 @@ Invoke `superpowers:writing-plans`, then break work into milestones and bite-siz ### Phase 6: Iterative Plan Review -Send the plan to the configured reviewer CLI for feedback. Revise and re-submit until approved (max 5 rounds). +Send the plan to the configured reviewer CLI for feedback. Revise and re-submit until approved (default max 10 rounds). **Skip this phase entirely if reviewer was set to `skip`.** @@ -109,10 +112,60 @@ Resolve the shared reviewer helper from the installed Codex skills directory: REVIEWER_RUNTIME=~/.codex/skills/reviewer-runtime/run-review.sh ``` +Set helper success-artifact args before writing the command script: + +```bash +HELPER_SUCCESS_FILE_ARGS=() +case "$REVIEWER_CLI" in + codex) + HELPER_SUCCESS_FILE_ARGS+=(--success-file /tmp/plan-review-${REVIEW_ID}.md) + ;; + cursor) + HELPER_SUCCESS_FILE_ARGS+=(--success-file /tmp/plan-review-${REVIEW_ID}.json) + ;; +esac +``` + #### Step 2: Write Plan to Temp File Write the complete plan (milestones, stories, design decisions, specs) to `/tmp/plan-${REVIEW_ID}.md`. +#### Review Contract (Applies to Every Round) + +The reviewer response must use this structure: + +```text +## Summary +... + +## Findings +### P0 +- ... +### P1 +- ... +### P2 +- ... +### P3 +- ... + +## Verdict +VERDICT: APPROVED +``` + +Rules: +- Order findings from `P0` to `P3`. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- Use `- None.` when a severity has no findings. +- `VERDICT: APPROVED` is allowed only when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking. +- The calling agent should still try to fix `P3` findings when they are cheap and safe. + +#### Liveness Contract (Applies While Review Is Running) + +- The shared reviewer runtime emits `state=in-progress note="In progress N"` heartbeats every 60 seconds while the reviewer child is alive. +- The calling agent must keep waiting as long as a fresh `In progress N` heartbeat keeps arriving roughly once per minute. +- Do not abort just because the review is slow, a soft timeout fired, or a `stall-warning` line appears, as long as the `In progress N` heartbeat continues. +- Treat missing heartbeats, `state=failed`, `state=completed-empty-output`, and `state=needs-operator-decision` as escalation signals. + #### Step 3: Submit to Reviewer (Round 1) Write the reviewer invocation to `/tmp/plan-review-${REVIEW_ID}.sh` as a bash script: @@ -136,8 +189,21 @@ codex exec \ 4. Alternatives — Is there a simpler or better approach? 5. Security — Any security concerns? -Be specific and actionable. If the plan is solid, end with exactly: VERDICT: APPROVED -If changes are needed, end with exactly: VERDICT: REVISE" +Return exactly these sections in order: +## Summary +## Findings +### P0 +### P1 +### P2 +### P3 +## Verdict + +Rules: +- Order findings from highest severity to lowest. +- Use `- None.` when a severity has no findings. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- End with exactly one verdict line: `VERDICT: APPROVED` or `VERDICT: REVISE` +- `VERDICT: APPROVED` is allowed only when there are no `P0`, `P1`, or `P2` findings. `P3` findings are non-blocking." ``` Do not try to capture the Codex session ID yet. When using the helper, extract it from `/tmp/plan-review-${REVIEW_ID}.runner.out` after the command completes (look for `session id: `), then store it as `CODEX_SESSION_ID` for resume in subsequent rounds. @@ -156,8 +222,21 @@ $(cat /tmp/plan-${REVIEW_ID}.md) 4. Alternatives — Is there a simpler or better approach? 5. Security — Any security concerns? -Be specific and actionable. If the plan is solid, end with exactly: VERDICT: APPROVED -If changes are needed, end with exactly: VERDICT: REVISE" \ +Return exactly these sections in order: +## Summary +## Findings +### P0 +### P1 +### P2 +### P3 +## Verdict + +Rules: +- Order findings from highest severity to lowest. +- Use `- None.` when a severity has no findings. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- End with exactly one verdict line: `VERDICT: APPROVED` or `VERDICT: REVISE` +- `VERDICT: APPROVED` is allowed only when there are no `P0`, `P1`, or `P2` findings. `P3` findings are non-blocking." \ --model ${REVIEWER_MODEL} \ --strict-mcp-config \ --setting-sources user @@ -178,8 +257,21 @@ cursor-agent -p \ 4. Alternatives — Is there a simpler or better approach? 5. Security — Any security concerns? -Be specific and actionable. If the plan is solid, end with exactly: VERDICT: APPROVED -If changes are needed, end with exactly: VERDICT: REVISE" \ +Return exactly these sections in order: +## Summary +## Findings +### P0 +### P1 +### P2 +### P3 +## Verdict + +Rules: +- Order findings from highest severity to lowest. +- Use `- None.` when a severity has no findings. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- End with exactly one verdict line: `VERDICT: APPROVED` or `VERDICT: REVISE` +- `VERDICT: APPROVED` is allowed only when there are no `P0`, `P1`, or `P2` findings. `P3` findings are non-blocking." \ > /tmp/plan-review-${REVIEW_ID}.json ``` @@ -193,13 +285,16 @@ if [ -x "$REVIEWER_RUNTIME" ]; then --command-file /tmp/plan-review-${REVIEW_ID}.sh \ --stdout-file /tmp/plan-review-${REVIEW_ID}.runner.out \ --stderr-file /tmp/plan-review-${REVIEW_ID}.stderr \ - --status-file /tmp/plan-review-${REVIEW_ID}.status + --status-file /tmp/plan-review-${REVIEW_ID}.status \ + "${HELPER_SUCCESS_FILE_ARGS[@]}" else echo "Warning: reviewer runtime helper not found at $REVIEWER_RUNTIME; falling back to direct synchronous review." >&2 bash /tmp/plan-review-${REVIEW_ID}.sh >/tmp/plan-review-${REVIEW_ID}.runner.out 2>/tmp/plan-review-${REVIEW_ID}.stderr fi ``` +Run the helper in the foreground and watch its live stdout for `state=in-progress` heartbeats. If your agent environment buffers command output until exit, start the helper in the background and poll `/tmp/plan-review-${REVIEW_ID}.status` separately instead of treating heartbeats as post-hoc-only data. + After the command completes: - If `REVIEWER_CLI=cursor`, extract the final review text: @@ -209,6 +304,11 @@ jq -r '.result' /tmp/plan-review-${REVIEW_ID}.json > /tmp/plan-review-${REVIEW_I ``` - If `REVIEWER_CLI=codex`, extract `CODEX_SESSION_ID` from `/tmp/plan-review-${REVIEW_ID}.runner.out` after the helper or fallback run. If the review text is only in `.runner.out`, move or copy the actual review body into `/tmp/plan-review-${REVIEW_ID}.md` before verdict parsing. +- If `REVIEWER_CLI=claude`, promote stdout captured by the helper or fallback runner into the markdown review file: + +```bash +cp /tmp/plan-review-${REVIEW_ID}.runner.out /tmp/plan-review-${REVIEW_ID}.md +``` Fallback is allowed only when the helper is missing or not executable. @@ -227,17 +327,19 @@ Fallback is allowed only when the helper is missing or not executable. [Reviewer feedback] ``` -3. Check verdict: - - **VERDICT: APPROVED** → proceed to Phase 7 (Initialize workspace) - - **VERDICT: REVISE** → go to Step 5 - - No clear verdict but positive / no actionable items → treat as approved +4. While the reviewer is still running, keep waiting as long as fresh `state=in-progress note="In progress N"` heartbeats continue to appear roughly once per minute. +5. Check verdict: + - **VERDICT: APPROVED** with no `P0`, `P1`, or `P2` findings → proceed to Phase 7 (Initialize workspace) + - **VERDICT: APPROVED** with only `P3` findings → optionally fix the `P3` items if they are cheap and safe, then proceed + - **VERDICT: REVISE** or any `P0`, `P1`, or `P2` finding → go to Step 5 + - No clear verdict but `P0`, `P1`, and `P2` are all `- None.` → treat as approved - Helper state `completed-empty-output` → treat as failed review attempt, surface stderr/status, fix invocation or prompt handling, then retry - - Helper state `needs-operator-decision` → surface status log and decide whether to keep waiting, abort, or retry with different helper parameters - - Max rounds (5) reached → proceed with warning + - Helper state `needs-operator-decision` → surface status log and decide whether to extend the timeout, abort, or retry with different helper parameters + - Max rounds (`MAX_ROUNDS`) reached → present the outcome to the user for a manual decision (proceed or stop) #### Step 5: Revise the Plan -Address each issue the reviewer raised. Update the plan in conversation context and rewrite `/tmp/plan-${REVIEW_ID}.md`. +Address the reviewer findings in priority order (`P0` → `P1` → `P2`, then `P3` when practical). Update the plan in conversation context and rewrite `/tmp/plan-${REVIEW_ID}.md`. Summarize revisions for the user: @@ -248,7 +350,9 @@ Summarize revisions for the user: If a revision contradicts the user's explicit requirements, skip it and note it for the user. -#### Step 6: Re-submit to Reviewer (Rounds 2-5) +#### Step 6: Re-submit to Reviewer (Rounds 2-N) + +Rewrite `/tmp/plan-review-${REVIEW_ID}.sh` for the next round. The script should contain the reviewer invocation only; do not run it directly. **If `REVIEWER_CLI` is `codex`:** @@ -262,8 +366,8 @@ codex exec resume ${CODEX_SESSION_ID} \ Changes made: [List specific changes] -Re-review. If solid, end with: VERDICT: APPROVED -If more changes needed, end with: VERDICT: REVISE" +Re-review using the same `## Summary`, `## Findings`, and `## Verdict` structure as before. +Keep findings ordered `P0` to `P3`, use `- None.` when a severity has no findings, and only use `VERDICT: APPROVED` when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking." ``` If resume fails (session expired), fall back to fresh `codex exec` with context about prior rounds. @@ -285,8 +389,8 @@ $(cat /tmp/plan-${REVIEW_ID}.md) Changes made: [List specific changes] -Re-review the full plan. If solid, end with: VERDICT: APPROVED -If more changes needed, end with: VERDICT: REVISE" \ +Re-review the full plan using the same `## Summary`, `## Findings`, and `## Verdict` structure as before. +Keep findings ordered `P0` to `P3`, use `- None.` when a severity has no findings, and only use `VERDICT: APPROVED` when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking." \ --model ${REVIEWER_MODEL} \ --strict-mcp-config \ --setting-sources user @@ -307,8 +411,8 @@ cursor-agent --resume ${CURSOR_SESSION_ID} -p \ Changes made: [List specific changes] -Re-review. If solid, end with: VERDICT: APPROVED -If more changes needed, end with: VERDICT: REVISE" \ +Re-review using the same `## Summary`, `## Findings`, and `## Verdict` structure as before. +Keep findings ordered `P0` to `P3`, use `- None.` when a severity has no findings, and only use `VERDICT: APPROVED` when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking." \ > /tmp/plan-review-${REVIEW_ID}.json jq -r '.result' /tmp/plan-review-${REVIEW_ID}.json > /tmp/plan-review-${REVIEW_ID}.md @@ -327,7 +431,7 @@ Return to Step 4. **Status:** Approved after N round(s) [or] -**Status:** Max rounds (5) reached — not fully approved +**Status:** Max rounds (`MAX_ROUNDS`) reached — not fully approved [Final feedback / remaining concerns] ``` @@ -374,19 +478,41 @@ Always instruct the executing agent: Do not rely on planner-private files during implementation. +### Phase 10: Telegram Completion Notification (MANDATORY) + +Resolve the Telegram notifier helper from the installed Codex skills directory: + +```bash +TELEGRAM_NOTIFY_RUNTIME=~/.codex/skills/reviewer-runtime/notify-telegram.sh +``` + +On every terminal outcome for the create-plan run (approved, max rounds reached, skipped reviewer, or failure), send a Telegram summary if the helper exists and both `TELEGRAM_BOT_TOKEN` and `TELEGRAM_CHAT_ID` are configured: + +```bash +if [ -x "$TELEGRAM_NOTIFY_RUNTIME" ] && [ -n "${TELEGRAM_BOT_TOKEN:-}" ] && [ -n "${TELEGRAM_CHAT_ID:-}" ]; then + "$TELEGRAM_NOTIFY_RUNTIME" --message "create-plan completed for : " +fi +``` + +Rules: +- Telegram is the only supported completion notification path. Do not use desktop notifications, `say`, email, or any other notifier. +- Notification failures are non-blocking, but they must be surfaced to the user. +- If Telegram is not configured, state that no completion notification was sent. + ## Quick Reference | Phase | Action | Required Output | |---|---|---| | 1 | Analyze codebase/context | Constraints and known patterns | | 2 | Gather requirements (one question at a time) | Confirmed scope and success criteria | -| 3 | Configure reviewer CLI and model | `REVIEWER_CLI` and `REVIEWER_MODEL` (or `skip`) | +| 3 | Configure reviewer CLI and model | `REVIEWER_CLI`, `REVIEWER_MODEL`, `MAX_ROUNDS` (or `skip`) | | 4 | Invoke `superpowers:brainstorming` | Chosen design approach | | 5 | Invoke `superpowers:writing-plans` | Milestones and bite-sized stories | -| 6 | Iterative plan review (max 5 rounds) | Reviewer approval or max-rounds warning | +| 6 | Iterative plan review (max `MAX_ROUNDS` rounds) | Reviewer approval or max-rounds warning | | 7 | Initialize `ai_plan/` + `.gitignore` | Local planning workspace ready | | 8 | Build plan package from templates | Full plan folder with required files | | 9 | Handoff with runbook-first instruction | Resumable execution context | +| 10 | Send Telegram completion notification | User notified or notification status reported | ## Execution Rules to Include in Plan (MANDATORY) @@ -413,6 +539,7 @@ Do not rely on planner-private files during implementation. - Handoff without explicit "read runbook first" direction. - Skipping the reviewer phase without explicit user opt-out. - Not capturing the Codex session ID for resume in subsequent review rounds. +- Using any completion notification path other than Telegram. ## Rationalizations and Counters @@ -440,6 +567,7 @@ Do not rely on planner-private files during implementation. - [ ] `.gitignore` ignore-rule commit was created if needed - [ ] Plan directory created under `ai_plan/YYYY-MM-DD-/` - [ ] Reviewer configured or explicitly skipped +- [ ] Max review rounds confirmed (default: 10) - [ ] Plan review completed (approved or max rounds) — or skipped - [ ] `original-plan.md` present - [ ] `final-transcript.md` present @@ -447,3 +575,4 @@ Do not rely on planner-private files during implementation. - [ ] `story-tracker.md` present - [ ] `continuation-runbook.md` present - [ ] Handoff explicitly says to read runbook first and execute from plan folder +- [ ] Telegram completion notification attempted if configured diff --git a/skills/create-plan/cursor/SKILL.md b/skills/create-plan/cursor/SKILL.md index dc7e9b7..4ec2cdd 100644 --- a/skills/create-plan/cursor/SKILL.md +++ b/skills/create-plan/cursor/SKILL.md @@ -73,7 +73,10 @@ If the user has already specified a reviewer CLI and model (e.g., "create a plan - For `cursor`: **run `cursor-agent models` first** to see your account's available models (availability varies by subscription) - Accept any model string the user provides -Store the chosen `REVIEWER_CLI` and `REVIEWER_MODEL` for Phase 6 (Iterative Plan Review). +3. **Max review rounds for the plan?** (default: 10) + - If the user does not provide a value, set `MAX_ROUNDS=10`. + +Store the chosen `REVIEWER_CLI`, `REVIEWER_MODEL`, and `MAX_ROUNDS` for Phase 6 (Iterative Plan Review). ### Phase 4: Design (REQUIRED SUB-SKILL) @@ -86,7 +89,7 @@ Story IDs: `S-{milestone}{sequence}`. ### Phase 6: Iterative Plan Review -Send the plan to the configured reviewer CLI for feedback. Revise and re-submit until approved (max 5 rounds). +Send the plan to the configured reviewer CLI for feedback. Revise and re-submit until approved (default max 10 rounds). **Skip this phase entirely if reviewer was set to `skip`.** @@ -115,10 +118,60 @@ else fi ``` +Set helper success-artifact args before writing the command script: + +```bash +HELPER_SUCCESS_FILE_ARGS=() +case "$REVIEWER_CLI" in + codex) + HELPER_SUCCESS_FILE_ARGS+=(--success-file /tmp/plan-review-${REVIEW_ID}.md) + ;; + cursor) + HELPER_SUCCESS_FILE_ARGS+=(--success-file /tmp/plan-review-${REVIEW_ID}.json) + ;; +esac +``` + #### Step 2: Write Plan to Temp File Write the complete plan (milestones, stories, design decisions, specs) to `/tmp/plan-${REVIEW_ID}.md`. +#### Review Contract (Applies to Every Round) + +The reviewer response must use this structure: + +```text +## Summary +... + +## Findings +### P0 +- ... +### P1 +- ... +### P2 +- ... +### P3 +- ... + +## Verdict +VERDICT: APPROVED +``` + +Rules: +- Order findings from `P0` to `P3`. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- Use `- None.` when a severity has no findings. +- `VERDICT: APPROVED` is allowed only when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking. +- The calling agent should still try to fix `P3` findings when they are cheap and safe. + +#### Liveness Contract (Applies While Review Is Running) + +- The shared reviewer runtime emits `state=in-progress note="In progress N"` heartbeats every 60 seconds while the reviewer child is alive. +- The calling agent must keep waiting as long as a fresh `In progress N` heartbeat keeps arriving roughly once per minute. +- Do not abort just because the review is slow, a soft timeout fired, or a `stall-warning` line appears, as long as the `In progress N` heartbeat continues. +- Treat missing heartbeats, `state=failed`, `state=completed-empty-output`, and `state=needs-operator-decision` as escalation signals. + #### Step 3: Submit to Reviewer (Round 1) Write the reviewer invocation to `/tmp/plan-review-${REVIEW_ID}.sh` as a bash script: @@ -142,8 +195,21 @@ codex exec \ 4. Alternatives — Is there a simpler or better approach? 5. Security — Any security concerns? -Be specific and actionable. If the plan is solid, end with exactly: VERDICT: APPROVED -If changes are needed, end with exactly: VERDICT: REVISE" +Return exactly these sections in order: +## Summary +## Findings +### P0 +### P1 +### P2 +### P3 +## Verdict + +Rules: +- Order findings from highest severity to lowest. +- Use `- None.` when a severity has no findings. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- End with exactly one verdict line: `VERDICT: APPROVED` or `VERDICT: REVISE` +- `VERDICT: APPROVED` is allowed only when there are no `P0`, `P1`, or `P2` findings. `P3` findings are non-blocking." ``` Do not try to capture the Codex session ID yet. When using the helper, extract it from `/tmp/plan-review-${REVIEW_ID}.runner.out` after the command completes (look for `session id: `), then store it as `CODEX_SESSION_ID` for resume in subsequent rounds. @@ -162,8 +228,21 @@ $(cat /tmp/plan-${REVIEW_ID}.md) 4. Alternatives — Is there a simpler or better approach? 5. Security — Any security concerns? -Be specific and actionable. If the plan is solid, end with exactly: VERDICT: APPROVED -If changes are needed, end with exactly: VERDICT: REVISE" \ +Return exactly these sections in order: +## Summary +## Findings +### P0 +### P1 +### P2 +### P3 +## Verdict + +Rules: +- Order findings from highest severity to lowest. +- Use `- None.` when a severity has no findings. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- End with exactly one verdict line: `VERDICT: APPROVED` or `VERDICT: REVISE` +- `VERDICT: APPROVED` is allowed only when there are no `P0`, `P1`, or `P2` findings. `P3` findings are non-blocking." \ --model ${REVIEWER_MODEL} \ --strict-mcp-config \ --setting-sources user @@ -184,8 +263,21 @@ cursor-agent -p \ 4. Alternatives — Is there a simpler or better approach? 5. Security — Any security concerns? -Be specific and actionable. If the plan is solid, end with exactly: VERDICT: APPROVED -If changes are needed, end with exactly: VERDICT: REVISE" \ +Return exactly these sections in order: +## Summary +## Findings +### P0 +### P1 +### P2 +### P3 +## Verdict + +Rules: +- Order findings from highest severity to lowest. +- Use `- None.` when a severity has no findings. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- End with exactly one verdict line: `VERDICT: APPROVED` or `VERDICT: REVISE` +- `VERDICT: APPROVED` is allowed only when there are no `P0`, `P1`, or `P2` findings. `P3` findings are non-blocking." \ > /tmp/plan-review-${REVIEW_ID}.json ``` @@ -205,13 +297,16 @@ if [ -x "$REVIEWER_RUNTIME" ]; then --command-file /tmp/plan-review-${REVIEW_ID}.sh \ --stdout-file /tmp/plan-review-${REVIEW_ID}.runner.out \ --stderr-file /tmp/plan-review-${REVIEW_ID}.stderr \ - --status-file /tmp/plan-review-${REVIEW_ID}.status + --status-file /tmp/plan-review-${REVIEW_ID}.status \ + "${HELPER_SUCCESS_FILE_ARGS[@]}" else echo "Warning: reviewer runtime helper not found at $REVIEWER_RUNTIME; falling back to direct synchronous review." >&2 bash /tmp/plan-review-${REVIEW_ID}.sh >/tmp/plan-review-${REVIEW_ID}.runner.out 2>/tmp/plan-review-${REVIEW_ID}.stderr fi ``` +Run the helper in the foreground and watch its live stdout for `state=in-progress` heartbeats. If your agent environment buffers command output until exit, start the helper in the background and poll `/tmp/plan-review-${REVIEW_ID}.status` separately instead of treating heartbeats as post-hoc-only data. + After the command completes: - If `REVIEWER_CLI=cursor`, extract the final review text: @@ -221,6 +316,11 @@ jq -r '.result' /tmp/plan-review-${REVIEW_ID}.json > /tmp/plan-review-${REVIEW_I ``` - If `REVIEWER_CLI=codex`, extract `CODEX_SESSION_ID` from `/tmp/plan-review-${REVIEW_ID}.runner.out` after the helper or fallback run. If the review text is only in `.runner.out`, move or copy the actual review body into `/tmp/plan-review-${REVIEW_ID}.md` before verdict parsing. +- If `REVIEWER_CLI=claude`, promote stdout captured by the helper or fallback runner into the markdown review file: + +```bash +cp /tmp/plan-review-${REVIEW_ID}.runner.out /tmp/plan-review-${REVIEW_ID}.md +``` #### Step 4: Read Review & Check Verdict @@ -237,17 +337,19 @@ jq -r '.result' /tmp/plan-review-${REVIEW_ID}.json > /tmp/plan-review-${REVIEW_I [Reviewer feedback] ``` -3. Check verdict: - - **VERDICT: APPROVED** → proceed to Phase 7 (Initialize workspace) - - **VERDICT: REVISE** → go to Step 5 - - No clear verdict but positive / no actionable items → treat as approved +4. While the reviewer is still running, keep waiting as long as fresh `state=in-progress note="In progress N"` heartbeats continue to appear roughly once per minute. +5. Check verdict: + - **VERDICT: APPROVED** with no `P0`, `P1`, or `P2` findings → proceed to Phase 7 (Initialize workspace) + - **VERDICT: APPROVED** with only `P3` findings → optionally fix the `P3` items if they are cheap and safe, then proceed + - **VERDICT: REVISE** or any `P0`, `P1`, or `P2` finding → go to Step 5 + - No clear verdict but `P0`, `P1`, and `P2` are all `- None.` → treat as approved - Helper state `completed-empty-output` → treat as failed review attempt, surface stderr/status, fix invocation or prompt handling, then retry - - Helper state `needs-operator-decision` → surface status log and decide whether to keep waiting, abort, or retry with different helper parameters - - Max rounds (5) reached → proceed with warning + - Helper state `needs-operator-decision` → surface status log and decide whether to extend the timeout, abort, or retry with different helper parameters + - Max rounds (`MAX_ROUNDS`) reached → present the outcome to the user for a manual decision (proceed or stop) #### Step 5: Revise the Plan -Address each issue the reviewer raised. Update the plan in conversation context and rewrite `/tmp/plan-${REVIEW_ID}.md`. +Address the reviewer findings in priority order (`P0` → `P1` → `P2`, then `P3` when practical). Update the plan in conversation context and rewrite `/tmp/plan-${REVIEW_ID}.md`. Summarize revisions for the user: @@ -258,7 +360,9 @@ Summarize revisions for the user: If a revision contradicts the user's explicit requirements, skip it and note it for the user. -#### Step 6: Re-submit to Reviewer (Rounds 2-5) +#### Step 6: Re-submit to Reviewer (Rounds 2-N) + +Rewrite `/tmp/plan-review-${REVIEW_ID}.sh` for the next round. The script should contain the reviewer invocation only; do not run it directly. **If `REVIEWER_CLI` is `codex`:** @@ -272,8 +376,8 @@ codex exec resume ${CODEX_SESSION_ID} \ Changes made: [List specific changes] -Re-review. If solid, end with: VERDICT: APPROVED -If more changes needed, end with: VERDICT: REVISE" +Re-review using the same `## Summary`, `## Findings`, and `## Verdict` structure as before. +Keep findings ordered `P0` to `P3`, use `- None.` when a severity has no findings, and only use `VERDICT: APPROVED` when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking." ``` If resume fails (session expired), fall back to fresh `codex exec` with context about prior rounds. @@ -295,8 +399,8 @@ $(cat /tmp/plan-${REVIEW_ID}.md) Changes made: [List specific changes] -Re-review the full plan. If solid, end with: VERDICT: APPROVED -If more changes needed, end with: VERDICT: REVISE" \ +Re-review the full plan using the same `## Summary`, `## Findings`, and `## Verdict` structure as before. +Keep findings ordered `P0` to `P3`, use `- None.` when a severity has no findings, and only use `VERDICT: APPROVED` when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking." \ --model ${REVIEWER_MODEL} \ --strict-mcp-config \ --setting-sources user @@ -317,8 +421,8 @@ cursor-agent --resume ${CURSOR_SESSION_ID} -p \ Changes made: [List specific changes] -Re-review. If solid, end with: VERDICT: APPROVED -If more changes needed, end with: VERDICT: REVISE" \ +Re-review using the same `## Summary`, `## Findings`, and `## Verdict` structure as before. +Keep findings ordered `P0` to `P3`, use `- None.` when a severity has no findings, and only use `VERDICT: APPROVED` when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking." \ > /tmp/plan-review-${REVIEW_ID}.json jq -r '.result' /tmp/plan-review-${REVIEW_ID}.json > /tmp/plan-review-${REVIEW_ID}.md @@ -337,7 +441,7 @@ Return to Step 4. **Status:** Approved after N round(s) [or] -**Status:** Max rounds (5) reached — not fully approved +**Status:** Max rounds (`MAX_ROUNDS`) reached — not fully approved [Final feedback / remaining concerns] ``` @@ -384,19 +488,45 @@ Always instruct the executing agent: Do not rely on planner-private files during implementation. +### Phase 10: Telegram Completion Notification (MANDATORY) + +Resolve the Telegram notifier helper from Cursor's installed skills directory: + +```bash +if [ -x .cursor/skills/reviewer-runtime/notify-telegram.sh ]; then + TELEGRAM_NOTIFY_RUNTIME=.cursor/skills/reviewer-runtime/notify-telegram.sh +else + TELEGRAM_NOTIFY_RUNTIME=~/.cursor/skills/reviewer-runtime/notify-telegram.sh +fi +``` + +On every terminal outcome for the create-plan run (approved, max rounds reached, skipped reviewer, or failure), send a Telegram summary if the helper exists and both `TELEGRAM_BOT_TOKEN` and `TELEGRAM_CHAT_ID` are configured: + +```bash +if [ -x "$TELEGRAM_NOTIFY_RUNTIME" ] && [ -n "${TELEGRAM_BOT_TOKEN:-}" ] && [ -n "${TELEGRAM_CHAT_ID:-}" ]; then + "$TELEGRAM_NOTIFY_RUNTIME" --message "create-plan completed for : " +fi +``` + +Rules: +- Telegram is the only supported completion notification path. Do not use desktop notifications, `say`, email, or any other notifier. +- Notification failures are non-blocking, but they must be surfaced to the user. +- If Telegram is not configured, state that no completion notification was sent. + ## Quick Reference | Phase | Action | Required Output | |---|---|---| | 1 | Analyze codebase/context | Constraints and known patterns | | 2 | Gather requirements (one question at a time) | Confirmed scope and success criteria | -| 3 | Configure reviewer CLI and model | `REVIEWER_CLI` and `REVIEWER_MODEL` (or `skip`) | +| 3 | Configure reviewer CLI and model | `REVIEWER_CLI`, `REVIEWER_MODEL`, `MAX_ROUNDS` (or `skip`) | | 4 | Invoke `superpowers:brainstorming` | Chosen design approach | | 5 | Invoke `superpowers:writing-plans` | Milestones and bite-sized stories | -| 6 | Iterative plan review (max 5 rounds) | Reviewer approval or max-rounds warning | +| 6 | Iterative plan review (max `MAX_ROUNDS` rounds) | Reviewer approval or max-rounds warning | | 7 | Initialize `ai_plan/` + `.gitignore` | Local planning workspace ready | | 8 | Build plan package from templates | Full plan folder with required files | | 9 | Handoff with runbook-first instruction | Resumable execution context | +| 10 | Send Telegram completion notification | User notified or notification status reported | ## Tracker Discipline (MANDATORY) @@ -437,6 +567,7 @@ After completing any story: - Omitting one or more required files in the plan package. - Handoff without explicit "read runbook first" direction. - Skipping the reviewer phase without explicit user opt-out. +- Using any completion notification path other than Telegram. ## Red Flags - Stop and Correct @@ -454,6 +585,7 @@ After completing any story: - [ ] `.gitignore` ignore-rule commit was created if needed - [ ] Plan directory created under `ai_plan/YYYY-MM-DD-/` - [ ] Reviewer configured or explicitly skipped +- [ ] Max review rounds confirmed (default: 10) - [ ] Plan review completed (approved or max rounds) — or skipped - [ ] `original-plan.md` present - [ ] `final-transcript.md` present @@ -461,6 +593,7 @@ After completing any story: - [ ] `story-tracker.md` present - [ ] `continuation-runbook.md` present - [ ] Handoff explicitly says to read runbook first and execute from plan folder +- [ ] Telegram completion notification attempted if configured ## Exit Triggers for Question Phase User says: "ready", "done", "let's plan", "proceed", "enough questions" diff --git a/skills/create-plan/opencode/SKILL.md b/skills/create-plan/opencode/SKILL.md index 28ee623..ca8702f 100644 --- a/skills/create-plan/opencode/SKILL.md +++ b/skills/create-plan/opencode/SKILL.md @@ -59,7 +59,10 @@ If the user has already specified a reviewer CLI and model (e.g., "create a plan - For `cursor`: **run `cursor-agent models` first** to see your account's available models (availability varies by subscription) - Accept any model string the user provides -Store the chosen `REVIEWER_CLI` and `REVIEWER_MODEL` for Phase 7 (Iterative Plan Review). +3. **Max review rounds for the plan?** (default: 10) + - If the user does not provide a value, set `MAX_ROUNDS=10`. + +Store the chosen `REVIEWER_CLI`, `REVIEWER_MODEL`, and `MAX_ROUNDS` for Phase 7 (Iterative Plan Review). ### Phase 5: Design (REQUIRED SUB-SKILL) @@ -78,7 +81,7 @@ Story IDs: `S-{milestone}{sequence}`. ### Phase 7: Iterative Plan Review -Send the plan to the configured reviewer CLI for feedback. Revise and re-submit until approved (max 5 rounds). +Send the plan to the configured reviewer CLI for feedback. Revise and re-submit until approved (default max 10 rounds). **Skip this phase entirely if reviewer was set to `skip`.** @@ -103,10 +106,60 @@ Resolve the shared reviewer helper from the installed OpenCode skills directory: REVIEWER_RUNTIME=~/.config/opencode/skills/reviewer-runtime/run-review.sh ``` +Set helper success-artifact args before writing the command script: + +```bash +HELPER_SUCCESS_FILE_ARGS=() +case "$REVIEWER_CLI" in + codex) + HELPER_SUCCESS_FILE_ARGS+=(--success-file /tmp/plan-review-${REVIEW_ID}.md) + ;; + cursor) + HELPER_SUCCESS_FILE_ARGS+=(--success-file /tmp/plan-review-${REVIEW_ID}.json) + ;; +esac +``` + #### Step 2: Write Plan to Temp File Write the complete plan (milestones, stories, design decisions, specs) to `/tmp/plan-${REVIEW_ID}.md`. +#### Review Contract (Applies to Every Round) + +The reviewer response must use this structure: + +```text +## Summary +... + +## Findings +### P0 +- ... +### P1 +- ... +### P2 +- ... +### P3 +- ... + +## Verdict +VERDICT: APPROVED +``` + +Rules: +- Order findings from `P0` to `P3`. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- Use `- None.` when a severity has no findings. +- `VERDICT: APPROVED` is allowed only when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking. +- The calling agent should still try to fix `P3` findings when they are cheap and safe. + +#### Liveness Contract (Applies While Review Is Running) + +- The shared reviewer runtime emits `state=in-progress note="In progress N"` heartbeats every 60 seconds while the reviewer child is alive. +- The calling agent must keep waiting as long as a fresh `In progress N` heartbeat keeps arriving roughly once per minute. +- Do not abort just because the review is slow, a soft timeout fired, or a `stall-warning` line appears, as long as the `In progress N` heartbeat continues. +- Treat missing heartbeats, `state=failed`, `state=completed-empty-output`, and `state=needs-operator-decision` as escalation signals. + #### Step 3: Submit to Reviewer (Round 1) Write the reviewer invocation to `/tmp/plan-review-${REVIEW_ID}.sh` as a bash script: @@ -130,8 +183,21 @@ codex exec \ 4. Alternatives — Is there a simpler or better approach? 5. Security — Any security concerns? -Be specific and actionable. If the plan is solid, end with exactly: VERDICT: APPROVED -If changes are needed, end with exactly: VERDICT: REVISE" +Return exactly these sections in order: +## Summary +## Findings +### P0 +### P1 +### P2 +### P3 +## Verdict + +Rules: +- Order findings from highest severity to lowest. +- Use `- None.` when a severity has no findings. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- End with exactly one verdict line: `VERDICT: APPROVED` or `VERDICT: REVISE` +- `VERDICT: APPROVED` is allowed only when there are no `P0`, `P1`, or `P2` findings. `P3` findings are non-blocking." ``` Do not try to capture the Codex session ID yet. When using the helper, extract it from `/tmp/plan-review-${REVIEW_ID}.runner.out` after the command completes (look for `session id: `), then store it as `CODEX_SESSION_ID` for resume in subsequent rounds. @@ -150,8 +216,21 @@ $(cat /tmp/plan-${REVIEW_ID}.md) 4. Alternatives — Is there a simpler or better approach? 5. Security — Any security concerns? -Be specific and actionable. If the plan is solid, end with exactly: VERDICT: APPROVED -If changes are needed, end with exactly: VERDICT: REVISE" \ +Return exactly these sections in order: +## Summary +## Findings +### P0 +### P1 +### P2 +### P3 +## Verdict + +Rules: +- Order findings from highest severity to lowest. +- Use `- None.` when a severity has no findings. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- End with exactly one verdict line: `VERDICT: APPROVED` or `VERDICT: REVISE` +- `VERDICT: APPROVED` is allowed only when there are no `P0`, `P1`, or `P2` findings. `P3` findings are non-blocking." \ --model ${REVIEWER_MODEL} \ --strict-mcp-config \ --setting-sources user @@ -172,8 +251,21 @@ cursor-agent -p \ 4. Alternatives — Is there a simpler or better approach? 5. Security — Any security concerns? -Be specific and actionable. If the plan is solid, end with exactly: VERDICT: APPROVED -If changes are needed, end with exactly: VERDICT: REVISE" \ +Return exactly these sections in order: +## Summary +## Findings +### P0 +### P1 +### P2 +### P3 +## Verdict + +Rules: +- Order findings from highest severity to lowest. +- Use `- None.` when a severity has no findings. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- End with exactly one verdict line: `VERDICT: APPROVED` or `VERDICT: REVISE` +- `VERDICT: APPROVED` is allowed only when there are no `P0`, `P1`, or `P2` findings. `P3` findings are non-blocking." \ > /tmp/plan-review-${REVIEW_ID}.json ``` @@ -187,13 +279,16 @@ if [ -x "$REVIEWER_RUNTIME" ]; then --command-file /tmp/plan-review-${REVIEW_ID}.sh \ --stdout-file /tmp/plan-review-${REVIEW_ID}.runner.out \ --stderr-file /tmp/plan-review-${REVIEW_ID}.stderr \ - --status-file /tmp/plan-review-${REVIEW_ID}.status + --status-file /tmp/plan-review-${REVIEW_ID}.status \ + "${HELPER_SUCCESS_FILE_ARGS[@]}" else echo "Warning: reviewer runtime helper not found at $REVIEWER_RUNTIME; falling back to direct synchronous review." >&2 bash /tmp/plan-review-${REVIEW_ID}.sh >/tmp/plan-review-${REVIEW_ID}.runner.out 2>/tmp/plan-review-${REVIEW_ID}.stderr fi ``` +Run the helper in the foreground and watch its live stdout for `state=in-progress` heartbeats. If your agent environment buffers command output until exit, start the helper in the background and poll `/tmp/plan-review-${REVIEW_ID}.status` separately instead of treating heartbeats as post-hoc-only data. + After the command completes: - If `REVIEWER_CLI=cursor`, extract the final review text: @@ -203,6 +298,11 @@ jq -r '.result' /tmp/plan-review-${REVIEW_ID}.json > /tmp/plan-review-${REVIEW_I ``` - If `REVIEWER_CLI=codex`, extract `CODEX_SESSION_ID` from `/tmp/plan-review-${REVIEW_ID}.runner.out` after the helper or fallback run. If the review text is only in `.runner.out`, move or copy the actual review body into `/tmp/plan-review-${REVIEW_ID}.md` before verdict parsing. +- If `REVIEWER_CLI=claude`, promote stdout captured by the helper or fallback runner into the markdown review file: + +```bash +cp /tmp/plan-review-${REVIEW_ID}.runner.out /tmp/plan-review-${REVIEW_ID}.md +``` #### Step 4: Read Review & Check Verdict @@ -219,17 +319,19 @@ jq -r '.result' /tmp/plan-review-${REVIEW_ID}.json > /tmp/plan-review-${REVIEW_I [Reviewer feedback] ``` -3. Check verdict: - - **VERDICT: APPROVED** → proceed to Phase 8 (Initialize workspace) - - **VERDICT: REVISE** → go to Step 5 - - No clear verdict but positive / no actionable items → treat as approved +4. While the reviewer is still running, keep waiting as long as fresh `state=in-progress note="In progress N"` heartbeats continue to appear roughly once per minute. +5. Check verdict: + - **VERDICT: APPROVED** with no `P0`, `P1`, or `P2` findings → proceed to Phase 8 (Initialize workspace) + - **VERDICT: APPROVED** with only `P3` findings → optionally fix the `P3` items if they are cheap and safe, then proceed + - **VERDICT: REVISE** or any `P0`, `P1`, or `P2` finding → go to Step 5 + - No clear verdict but `P0`, `P1`, and `P2` are all `- None.` → treat as approved - Helper state `completed-empty-output` → treat as failed review attempt, surface stderr/status, fix invocation or prompt handling, then retry - - Helper state `needs-operator-decision` → surface status log and decide whether to keep waiting, abort, or retry with different helper parameters - - Max rounds (5) reached → proceed with warning + - Helper state `needs-operator-decision` → surface status log and decide whether to extend the timeout, abort, or retry with different helper parameters + - Max rounds (`MAX_ROUNDS`) reached → present the outcome to the user for a manual decision (proceed or stop) #### Step 5: Revise the Plan -Address each issue the reviewer raised. Update the plan in conversation context and rewrite `/tmp/plan-${REVIEW_ID}.md`. +Address the reviewer findings in priority order (`P0` → `P1` → `P2`, then `P3` when practical). Update the plan in conversation context and rewrite `/tmp/plan-${REVIEW_ID}.md`. Summarize revisions for the user: @@ -240,7 +342,9 @@ Summarize revisions for the user: If a revision contradicts the user's explicit requirements, skip it and note it for the user. -#### Step 6: Re-submit to Reviewer (Rounds 2-5) +#### Step 6: Re-submit to Reviewer (Rounds 2-N) + +Rewrite `/tmp/plan-review-${REVIEW_ID}.sh` for the next round. The script should contain the reviewer invocation only; do not run it directly. **If `REVIEWER_CLI` is `codex`:** @@ -254,8 +358,8 @@ codex exec resume ${CODEX_SESSION_ID} \ Changes made: [List specific changes] -Re-review. If solid, end with: VERDICT: APPROVED -If more changes needed, end with: VERDICT: REVISE" +Re-review using the same `## Summary`, `## Findings`, and `## Verdict` structure as before. +Keep findings ordered `P0` to `P3`, use `- None.` when a severity has no findings, and only use `VERDICT: APPROVED` when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking." ``` If resume fails (session expired), fall back to fresh `codex exec` with context about prior rounds. @@ -277,8 +381,8 @@ $(cat /tmp/plan-${REVIEW_ID}.md) Changes made: [List specific changes] -Re-review the full plan. If solid, end with: VERDICT: APPROVED -If more changes needed, end with: VERDICT: REVISE" \ +Re-review the full plan using the same `## Summary`, `## Findings`, and `## Verdict` structure as before. +Keep findings ordered `P0` to `P3`, use `- None.` when a severity has no findings, and only use `VERDICT: APPROVED` when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking." \ --model ${REVIEWER_MODEL} \ --strict-mcp-config \ --setting-sources user @@ -299,8 +403,8 @@ cursor-agent --resume ${CURSOR_SESSION_ID} -p \ Changes made: [List specific changes] -Re-review. If solid, end with: VERDICT: APPROVED -If more changes needed, end with: VERDICT: REVISE" \ +Re-review using the same `## Summary`, `## Findings`, and `## Verdict` structure as before. +Keep findings ordered `P0` to `P3`, use `- None.` when a severity has no findings, and only use `VERDICT: APPROVED` when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking." \ > /tmp/plan-review-${REVIEW_ID}.json jq -r '.result' /tmp/plan-review-${REVIEW_ID}.json > /tmp/plan-review-${REVIEW_ID}.md @@ -319,7 +423,7 @@ Return to Step 4. **Status:** Approved after N round(s) [or] -**Status:** Max rounds (5) reached — not fully approved +**Status:** Max rounds (`MAX_ROUNDS`) reached — not fully approved [Final feedback / remaining concerns] ``` @@ -364,6 +468,27 @@ Use templates from this skill's `templates/` folder. Always instruct the executing agent: > Read `ai_plan/YYYY-MM-DD-/continuation-runbook.md` first, then execute from `ai_plan` files only. +### Phase 11: Telegram Completion Notification (MANDATORY) + +Resolve the Telegram notifier helper from the installed OpenCode skills directory: + +```bash +TELEGRAM_NOTIFY_RUNTIME=~/.config/opencode/skills/reviewer-runtime/notify-telegram.sh +``` + +On every terminal outcome for the create-plan run (approved, max rounds reached, skipped reviewer, or failure), send a Telegram summary if the helper exists and both `TELEGRAM_BOT_TOKEN` and `TELEGRAM_CHAT_ID` are configured: + +```bash +if [ -x "$TELEGRAM_NOTIFY_RUNTIME" ] && [ -n "${TELEGRAM_BOT_TOKEN:-}" ] && [ -n "${TELEGRAM_CHAT_ID:-}" ]; then + "$TELEGRAM_NOTIFY_RUNTIME" --message "create-plan completed for : " +fi +``` + +Rules: +- Telegram is the only supported completion notification path. Do not use desktop notifications, `say`, email, or any other notifier. +- Notification failures are non-blocking, but they must be surfaced to the user. +- If Telegram is not configured, state that no completion notification was sent. + ## Tracker Discipline (MANDATORY) Before starting any story: @@ -400,6 +525,7 @@ After completing any story: - [ ] `.gitignore` ignore-rule commit was created if needed - [ ] Plan directory created under `ai_plan/YYYY-MM-DD-/` - [ ] Reviewer configured or explicitly skipped +- [ ] Max review rounds confirmed (default: 10) - [ ] Plan review completed (approved or max rounds) — or skipped - [ ] `original-plan.md` present - [ ] `final-transcript.md` present @@ -407,6 +533,7 @@ After completing any story: - [ ] `story-tracker.md` created with all stories as `pending` - [ ] `continuation-runbook.md` present - [ ] Handoff explicitly says to read runbook first and execute from plan folder +- [ ] Telegram completion notification attempted if configured ## Exit Triggers for Question Phase User says: "ready", "done", "let's plan", "proceed", "enough questions" diff --git a/skills/implement-plan/claude-code/SKILL.md b/skills/implement-plan/claude-code/SKILL.md index e157f22..c518cc7 100644 --- a/skills/implement-plan/claude-code/SKILL.md +++ b/skills/implement-plan/claude-code/SKILL.md @@ -151,6 +151,20 @@ Resolve the shared runtime helper path before writing the command script: REVIEWER_RUNTIME=~/.claude/skills/reviewer-runtime/run-review.sh ``` +Set helper success-artifact args before writing the command script: + +```bash +HELPER_SUCCESS_FILE_ARGS=() +case "$REVIEWER_CLI" in + codex) + HELPER_SUCCESS_FILE_ARGS+=(--success-file /tmp/milestone-review-${REVIEW_ID}.md) + ;; + cursor) + HELPER_SUCCESS_FILE_ARGS+=(--success-file /tmp/milestone-review-${REVIEW_ID}.json) + ;; +esac +``` + #### Step 2: Write Review Payload Write to `/tmp/milestone-${REVIEW_ID}.md`: @@ -176,6 +190,42 @@ Write to `/tmp/milestone-${REVIEW_ID}.md`: [test output with pass/fail counts] ``` +#### Review Contract (Applies to Every Round) + +The reviewer response must use this structure: + +```text +## Summary +... + +## Findings +### P0 +- ... +### P1 +- ... +### P2 +- ... +### P3 +- ... + +## Verdict +VERDICT: APPROVED +``` + +Rules: +- Order findings from `P0` to `P3`. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- Use `- None.` when a severity has no findings. +- `VERDICT: APPROVED` is allowed only when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking. +- The calling agent should still try to fix `P3` findings when they are cheap and safe. + +#### Liveness Contract (Applies While Review Is Running) + +- The shared reviewer runtime emits `state=in-progress note="In progress N"` heartbeats every 60 seconds while the reviewer child is alive. +- The calling agent must keep waiting as long as a fresh `In progress N` heartbeat keeps arriving roughly once per minute. +- Do not abort just because the review is slow, a soft timeout fired, or a `stall-warning` line appears, as long as the `In progress N` heartbeat continues. +- Treat missing heartbeats, `state=failed`, `state=completed-empty-output`, and `state=needs-operator-decision` as escalation signals. + #### Step 3: Submit to Reviewer (Round 1) Write the reviewer invocation to `/tmp/milestone-review-${REVIEW_ID}.sh` as a bash script: @@ -201,8 +251,22 @@ Evaluate: 4. Test coverage — Are changes adequately tested? 5. Security — Any security concerns introduced? -Be specific and actionable. If solid, end with exactly: VERDICT: APPROVED -If changes are needed, end with exactly: VERDICT: REVISE" + +Return exactly these sections in order: +## Summary +## Findings +### P0 +### P1 +### P2 +### P3 +## Verdict + +Rules: +- Order findings from highest severity to lowest. +- Use `- None.` when a severity has no findings. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- End with exactly one verdict line: `VERDICT: APPROVED` or `VERDICT: REVISE` +- `VERDICT: APPROVED` is allowed only when there are no `P0`, `P1`, or `P2` findings. `P3` findings are non-blocking." ``` Do not try to capture the Codex session ID yet. When using the helper, extract it from `/tmp/milestone-review-${REVIEW_ID}.runner.out` after the command completes (look for `session id: `), then store it as `CODEX_SESSION_ID` for resume in subsequent rounds. @@ -222,8 +286,22 @@ Evaluate: 4. Test coverage — Are changes adequately tested? 5. Security — Any security concerns introduced? -Be specific and actionable. If solid, end with exactly: VERDICT: APPROVED -If changes are needed, end with exactly: VERDICT: REVISE" \ + +Return exactly these sections in order: +## Summary +## Findings +### P0 +### P1 +### P2 +### P3 +## Verdict + +Rules: +- Order findings from highest severity to lowest. +- Use `- None.` when a severity has no findings. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- End with exactly one verdict line: `VERDICT: APPROVED` or `VERDICT: REVISE` +- `VERDICT: APPROVED` is allowed only when there are no `P0`, `P1`, or `P2` findings. `P3` findings are non-blocking." \ --model ${REVIEWER_MODEL} \ --strict-mcp-config \ --setting-sources user @@ -246,8 +324,22 @@ Evaluate: 4. Test coverage — Are changes adequately tested? 5. Security — Any security concerns introduced? -Be specific and actionable. If solid, end with exactly: VERDICT: APPROVED -If changes are needed, end with exactly: VERDICT: REVISE" \ + +Return exactly these sections in order: +## Summary +## Findings +### P0 +### P1 +### P2 +### P3 +## Verdict + +Rules: +- Order findings from highest severity to lowest. +- Use `- None.` when a severity has no findings. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- End with exactly one verdict line: `VERDICT: APPROVED` or `VERDICT: REVISE` +- `VERDICT: APPROVED` is allowed only when there are no `P0`, `P1`, or `P2` findings. `P3` findings are non-blocking." \ > /tmp/milestone-review-${REVIEW_ID}.json ``` @@ -261,13 +353,16 @@ if [ -x "$REVIEWER_RUNTIME" ]; then --command-file /tmp/milestone-review-${REVIEW_ID}.sh \ --stdout-file /tmp/milestone-review-${REVIEW_ID}.runner.out \ --stderr-file /tmp/milestone-review-${REVIEW_ID}.stderr \ - --status-file /tmp/milestone-review-${REVIEW_ID}.status + --status-file /tmp/milestone-review-${REVIEW_ID}.status \ + "${HELPER_SUCCESS_FILE_ARGS[@]}" else echo "Warning: reviewer runtime helper not found at $REVIEWER_RUNTIME; falling back to direct synchronous review." >&2 bash /tmp/milestone-review-${REVIEW_ID}.sh >/tmp/milestone-review-${REVIEW_ID}.runner.out 2>/tmp/milestone-review-${REVIEW_ID}.stderr fi ``` +Run the helper in the foreground and watch its live stdout for `state=in-progress` heartbeats. If your agent environment buffers command output until exit, start the helper in the background and poll `/tmp/milestone-review-${REVIEW_ID}.status` separately instead of treating heartbeats as post-hoc-only data. + After the command completes: - If `REVIEWER_CLI=cursor`, extract the final review text: @@ -277,6 +372,11 @@ jq -r '.result' /tmp/milestone-review-${REVIEW_ID}.json > /tmp/milestone-review- ``` - If `REVIEWER_CLI=codex`, extract `CODEX_SESSION_ID` from `/tmp/milestone-review-${REVIEW_ID}.runner.out` after the helper or fallback run. If the review text is only in `.runner.out`, move or copy the actual review body into `/tmp/milestone-review-${REVIEW_ID}.md` before verdict parsing. +- If `REVIEWER_CLI=claude`, promote stdout captured by the helper or fallback runner into the markdown review file: + +```bash +cp /tmp/milestone-review-${REVIEW_ID}.runner.out /tmp/milestone-review-${REVIEW_ID}.md +``` Fallback is allowed only when the helper is missing or not executable. @@ -295,17 +395,19 @@ Fallback is allowed only when the helper is missing or not executable. [Reviewer feedback] ``` -4. Check verdict: - - **VERDICT: APPROVED** -> proceed to Phase 4 Step 6 (commit & approve) - - **VERDICT: REVISE** -> go to Step 5 - - No clear verdict but positive / no actionable items -> treat as approved +4. While the reviewer is still running, keep waiting as long as fresh `state=in-progress note="In progress N"` heartbeats continue to appear roughly once per minute. +5. Check verdict: + - **VERDICT: APPROVED** with no `P0`, `P1`, or `P2` findings -> proceed to Phase 4 Step 6 (commit & approve) + - **VERDICT: APPROVED** with only `P3` findings -> optionally fix the `P3` items if they are cheap and safe, then proceed + - **VERDICT: REVISE** or any `P0`, `P1`, or `P2` finding -> go to Step 5 + - No clear verdict but `P0`, `P1`, and `P2` are all `- None.` -> treat as approved - Helper state `completed-empty-output` -> treat as failed review attempt, surface stderr/status, fix invocation or prompt handling, then retry - - Helper state `needs-operator-decision` -> surface status log, note any `stall-warning` heartbeat lines as non-terminal operator hints, and decide whether to keep waiting, abort, or retry with different helper parameters + - Helper state `needs-operator-decision` -> surface status log and decide whether to extend the timeout, abort, or retry with different helper parameters - Max rounds (`MAX_ROUNDS`) reached -> present to user for manual decision (proceed or stop) #### Step 5: Address Feedback & Re-verify -1. Address each issue the reviewer raised (do NOT commit yet). +1. Address the reviewer findings in priority order (`P0` -> `P1` -> `P2`, then `P3` when practical) (do NOT commit yet). 2. Re-run verification (lint/typecheck/tests) — all must pass. 3. Update `/tmp/milestone-${REVIEW_ID}.md` with new diff and verification output. @@ -334,8 +436,8 @@ codex exec resume ${CODEX_SESSION_ID} \ Changes made: [List specific changes] -Re-review. If solid, end with: VERDICT: APPROVED -If more changes needed, end with: VERDICT: REVISE" +Re-review using the same `## Summary`, `## Findings`, and `## Verdict` structure as before. +Keep findings ordered `P0` to `P3`, use `- None.` when a severity has no findings, and only use `VERDICT: APPROVED` when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking." ``` If resume fails (session expired), fall back to fresh `codex exec` with context about prior rounds. @@ -357,12 +459,11 @@ $(cat /tmp/milestone-${REVIEW_ID}.md) Changes made: [List specific changes] -Re-review. If solid, end with: VERDICT: APPROVED -If more changes needed, end with: VERDICT: REVISE" \ +Re-review using the same `## Summary`, `## Findings`, and `## Verdict` structure as before. +Keep findings ordered `P0` to `P3`, use `- None.` when a severity has no findings, and only use `VERDICT: APPROVED` when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking." \ --model ${REVIEWER_MODEL} \ --strict-mcp-config \ - --setting-sources user \ - > /tmp/milestone-review-${REVIEW_ID}.md + --setting-sources user ``` **If `REVIEWER_CLI` is `cursor`:** @@ -380,8 +481,8 @@ cursor-agent --resume ${CURSOR_SESSION_ID} -p \ Changes made: [List specific changes] -Re-review. If solid, end with: VERDICT: APPROVED -If more changes needed, end with: VERDICT: REVISE" \ +Re-review using the same `## Summary`, `## Findings`, and `## Verdict` structure as before. +Keep findings ordered `P0` to `P3`, use `- None.` when a severity has no findings, and only use `VERDICT: APPROVED` when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking." \ > /tmp/milestone-review-${REVIEW_ID}.json ``` @@ -443,6 +544,27 @@ Present summary: **Branch:** implement/ (merged and deleted) ``` +### Phase 8: Telegram Completion Notification (MANDATORY) + +Resolve the Telegram notifier helper from the installed Claude Code skills directory: + +```bash +TELEGRAM_NOTIFY_RUNTIME=~/.claude/skills/reviewer-runtime/notify-telegram.sh +``` + +On every terminal outcome for the implement-plan run (fully completed, stopped after max rounds, skipped reviewer, or failure), send a Telegram summary if the helper exists and both `TELEGRAM_BOT_TOKEN` and `TELEGRAM_CHAT_ID` are configured: + +```bash +if [ -x "$TELEGRAM_NOTIFY_RUNTIME" ] && [ -n "${TELEGRAM_BOT_TOKEN:-}" ] && [ -n "${TELEGRAM_CHAT_ID:-}" ]; then + "$TELEGRAM_NOTIFY_RUNTIME" --message "implement-plan completed for : " +fi +``` + +Rules: +- Telegram is the only supported completion notification path. Do not use desktop notifications, `say`, email, or any other notifier. +- Notification failures are non-blocking, but they must be surfaced to the user. +- If Telegram is not configured, state that no completion notification was sent. + ## Tracker Discipline (MANDATORY) **ALWAYS update `story-tracker.md` before/after each story. NEVER proceed with stale tracker state.** @@ -477,3 +599,4 @@ Note: Commit hashes are backfilled into story Notes after the milestone commit ( - [ ] Final test suite passes - [ ] Worktree branch merged to parent and worktree deleted - [ ] Story tracker updated with final status +- [ ] Telegram completion notification attempted if configured diff --git a/skills/implement-plan/codex/SKILL.md b/skills/implement-plan/codex/SKILL.md index d7605cc..bebb82d 100644 --- a/skills/implement-plan/codex/SKILL.md +++ b/skills/implement-plan/codex/SKILL.md @@ -184,6 +184,20 @@ Resolve the shared runtime helper path before writing the command script: REVIEWER_RUNTIME=~/.codex/skills/reviewer-runtime/run-review.sh ``` +Set helper success-artifact args before writing the command script: + +```bash +HELPER_SUCCESS_FILE_ARGS=() +case "$REVIEWER_CLI" in + codex) + HELPER_SUCCESS_FILE_ARGS+=(--success-file /tmp/milestone-review-${REVIEW_ID}.md) + ;; + cursor) + HELPER_SUCCESS_FILE_ARGS+=(--success-file /tmp/milestone-review-${REVIEW_ID}.json) + ;; +esac +``` + #### Step 2: Write Review Payload Write to `/tmp/milestone-${REVIEW_ID}.md`: @@ -209,6 +223,42 @@ Write to `/tmp/milestone-${REVIEW_ID}.md`: [test output with pass/fail counts] ``` +#### Review Contract (Applies to Every Round) + +The reviewer response must use this structure: + +```text +## Summary +... + +## Findings +### P0 +- ... +### P1 +- ... +### P2 +- ... +### P3 +- ... + +## Verdict +VERDICT: APPROVED +``` + +Rules: +- Order findings from `P0` to `P3`. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- Use `- None.` when a severity has no findings. +- `VERDICT: APPROVED` is allowed only when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking. +- The calling agent should still try to fix `P3` findings when they are cheap and safe. + +#### Liveness Contract (Applies While Review Is Running) + +- The shared reviewer runtime emits `state=in-progress note="In progress N"` heartbeats every 60 seconds while the reviewer child is alive. +- The calling agent must keep waiting as long as a fresh `In progress N` heartbeat keeps arriving roughly once per minute. +- Do not abort just because the review is slow, a soft timeout fired, or a `stall-warning` line appears, as long as the `In progress N` heartbeat continues. +- Treat missing heartbeats, `state=failed`, `state=completed-empty-output`, and `state=needs-operator-decision` as escalation signals. + #### Step 3: Submit to Reviewer (Round 1) Write the reviewer invocation to `/tmp/milestone-review-${REVIEW_ID}.sh` as a bash script: @@ -234,8 +284,22 @@ Evaluate: 4. Test coverage — Are changes adequately tested? 5. Security — Any security concerns introduced? -Be specific and actionable. If solid, end with exactly: VERDICT: APPROVED -If changes are needed, end with exactly: VERDICT: REVISE" + +Return exactly these sections in order: +## Summary +## Findings +### P0 +### P1 +### P2 +### P3 +## Verdict + +Rules: +- Order findings from highest severity to lowest. +- Use `- None.` when a severity has no findings. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- End with exactly one verdict line: `VERDICT: APPROVED` or `VERDICT: REVISE` +- `VERDICT: APPROVED` is allowed only when there are no `P0`, `P1`, or `P2` findings. `P3` findings are non-blocking." ``` Do not try to capture the Codex session ID yet. When using the helper, extract it from `/tmp/milestone-review-${REVIEW_ID}.runner.out` after the command completes (look for `session id: `), then store it as `CODEX_SESSION_ID` for resume in subsequent rounds. @@ -255,8 +319,22 @@ Evaluate: 4. Test coverage — Are changes adequately tested? 5. Security — Any security concerns introduced? -Be specific and actionable. If solid, end with exactly: VERDICT: APPROVED -If changes are needed, end with exactly: VERDICT: REVISE" \ + +Return exactly these sections in order: +## Summary +## Findings +### P0 +### P1 +### P2 +### P3 +## Verdict + +Rules: +- Order findings from highest severity to lowest. +- Use `- None.` when a severity has no findings. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- End with exactly one verdict line: `VERDICT: APPROVED` or `VERDICT: REVISE` +- `VERDICT: APPROVED` is allowed only when there are no `P0`, `P1`, or `P2` findings. `P3` findings are non-blocking." \ --model ${REVIEWER_MODEL} \ --strict-mcp-config \ --setting-sources user @@ -279,8 +357,22 @@ Evaluate: 4. Test coverage — Are changes adequately tested? 5. Security — Any security concerns introduced? -Be specific and actionable. If solid, end with exactly: VERDICT: APPROVED -If changes are needed, end with exactly: VERDICT: REVISE" \ + +Return exactly these sections in order: +## Summary +## Findings +### P0 +### P1 +### P2 +### P3 +## Verdict + +Rules: +- Order findings from highest severity to lowest. +- Use `- None.` when a severity has no findings. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- End with exactly one verdict line: `VERDICT: APPROVED` or `VERDICT: REVISE` +- `VERDICT: APPROVED` is allowed only when there are no `P0`, `P1`, or `P2` findings. `P3` findings are non-blocking." \ > /tmp/milestone-review-${REVIEW_ID}.json ``` @@ -294,13 +386,16 @@ if [ -x "$REVIEWER_RUNTIME" ]; then --command-file /tmp/milestone-review-${REVIEW_ID}.sh \ --stdout-file /tmp/milestone-review-${REVIEW_ID}.runner.out \ --stderr-file /tmp/milestone-review-${REVIEW_ID}.stderr \ - --status-file /tmp/milestone-review-${REVIEW_ID}.status + --status-file /tmp/milestone-review-${REVIEW_ID}.status \ + "${HELPER_SUCCESS_FILE_ARGS[@]}" else echo "Warning: reviewer runtime helper not found at $REVIEWER_RUNTIME; falling back to direct synchronous review." >&2 bash /tmp/milestone-review-${REVIEW_ID}.sh >/tmp/milestone-review-${REVIEW_ID}.runner.out 2>/tmp/milestone-review-${REVIEW_ID}.stderr fi ``` +Run the helper in the foreground and watch its live stdout for `state=in-progress` heartbeats. If your agent environment buffers command output until exit, start the helper in the background and poll `/tmp/milestone-review-${REVIEW_ID}.status` separately instead of treating heartbeats as post-hoc-only data. + After the command completes: - If `REVIEWER_CLI=cursor`, extract the final review text: @@ -310,6 +405,11 @@ jq -r '.result' /tmp/milestone-review-${REVIEW_ID}.json > /tmp/milestone-review- ``` - If `REVIEWER_CLI=codex`, extract `CODEX_SESSION_ID` from `/tmp/milestone-review-${REVIEW_ID}.runner.out` after the helper or fallback run. If the review text is only in `.runner.out`, move or copy the actual review body into `/tmp/milestone-review-${REVIEW_ID}.md` before verdict parsing. +- If `REVIEWER_CLI=claude`, promote stdout captured by the helper or fallback runner into the markdown review file: + +```bash +cp /tmp/milestone-review-${REVIEW_ID}.runner.out /tmp/milestone-review-${REVIEW_ID}.md +``` Fallback is allowed only when the helper is missing or not executable. @@ -328,17 +428,19 @@ Fallback is allowed only when the helper is missing or not executable. [Reviewer feedback] ``` -4. Check verdict: - - **VERDICT: APPROVED** -> proceed to Phase 4 Step 6 (commit & approve) - - **VERDICT: REVISE** -> go to Step 5 - - No clear verdict but positive / no actionable items -> treat as approved +4. While the reviewer is still running, keep waiting as long as fresh `state=in-progress note="In progress N"` heartbeats continue to appear roughly once per minute. +5. Check verdict: + - **VERDICT: APPROVED** with no `P0`, `P1`, or `P2` findings -> proceed to Phase 4 Step 6 (commit & approve) + - **VERDICT: APPROVED** with only `P3` findings -> optionally fix the `P3` items if they are cheap and safe, then proceed + - **VERDICT: REVISE** or any `P0`, `P1`, or `P2` finding -> go to Step 5 + - No clear verdict but `P0`, `P1`, and `P2` are all `- None.` -> treat as approved - Helper state `completed-empty-output` -> treat as failed review attempt, surface stderr/status, fix invocation or prompt handling, then retry - - Helper state `needs-operator-decision` -> surface status log, note any `stall-warning` heartbeat lines as non-terminal operator hints, and decide whether to keep waiting, abort, or retry with different helper parameters + - Helper state `needs-operator-decision` -> surface status log and decide whether to extend the timeout, abort, or retry with different helper parameters - Max rounds (`MAX_ROUNDS`) reached -> present to user for manual decision (proceed or stop) #### Step 5: Address Feedback & Re-verify -1. Address each issue the reviewer raised (do NOT commit yet). +1. Address the reviewer findings in priority order (`P0` -> `P1` -> `P2`, then `P3` when practical) (do NOT commit yet). 2. Re-run verification (lint/typecheck/tests) — all must pass. 3. Update `/tmp/milestone-${REVIEW_ID}.md` with new diff and verification output. @@ -367,8 +469,8 @@ codex exec resume ${CODEX_SESSION_ID} \ Changes made: [List specific changes] -Re-review. If solid, end with: VERDICT: APPROVED -If more changes needed, end with: VERDICT: REVISE" +Re-review using the same `## Summary`, `## Findings`, and `## Verdict` structure as before. +Keep findings ordered `P0` to `P3`, use `- None.` when a severity has no findings, and only use `VERDICT: APPROVED` when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking." ``` If resume fails (session expired), fall back to fresh `codex exec` with context about prior rounds. @@ -390,12 +492,11 @@ $(cat /tmp/milestone-${REVIEW_ID}.md) Changes made: [List specific changes] -Re-review. If solid, end with: VERDICT: APPROVED -If more changes needed, end with: VERDICT: REVISE" \ +Re-review using the same `## Summary`, `## Findings`, and `## Verdict` structure as before. +Keep findings ordered `P0` to `P3`, use `- None.` when a severity has no findings, and only use `VERDICT: APPROVED` when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking." \ --model ${REVIEWER_MODEL} \ --strict-mcp-config \ - --setting-sources user \ - > /tmp/milestone-review-${REVIEW_ID}.md + --setting-sources user ``` **If `REVIEWER_CLI` is `cursor`:** @@ -413,8 +514,8 @@ cursor-agent --resume ${CURSOR_SESSION_ID} -p \ Changes made: [List specific changes] -Re-review. If solid, end with: VERDICT: APPROVED -If more changes needed, end with: VERDICT: REVISE" \ +Re-review using the same `## Summary`, `## Findings`, and `## Verdict` structure as before. +Keep findings ordered `P0` to `P3`, use `- None.` when a severity has no findings, and only use `VERDICT: APPROVED` when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking." \ > /tmp/milestone-review-${REVIEW_ID}.json ``` @@ -476,6 +577,27 @@ Present summary: **Branch:** implement/ (merged and deleted) ``` +### Phase 8: Telegram Completion Notification (MANDATORY) + +Resolve the Telegram notifier helper from the installed Codex skills directory: + +```bash +TELEGRAM_NOTIFY_RUNTIME=~/.codex/skills/reviewer-runtime/notify-telegram.sh +``` + +On every terminal outcome for the implement-plan run (fully completed, stopped after max rounds, skipped reviewer, or failure), send a Telegram summary if the helper exists and both `TELEGRAM_BOT_TOKEN` and `TELEGRAM_CHAT_ID` are configured: + +```bash +if [ -x "$TELEGRAM_NOTIFY_RUNTIME" ] && [ -n "${TELEGRAM_BOT_TOKEN:-}" ] && [ -n "${TELEGRAM_CHAT_ID:-}" ]; then + "$TELEGRAM_NOTIFY_RUNTIME" --message "implement-plan completed for : " +fi +``` + +Rules: +- Telegram is the only supported completion notification path. Do not use desktop notifications, `say`, email, or any other notifier. +- Notification failures are non-blocking, but they must be surfaced to the user. +- If Telegram is not configured, state that no completion notification was sent. + ## Quick Reference | Phase | Action | Required Output | @@ -487,6 +609,7 @@ Present summary: | 5 | Milestone review loop (per milestone) | Reviewer approval or max rounds + user override | | 6 | Invoke `superpowers:finishing-a-development-branch` | Branch merged to parent, worktree deleted | | 7 | Final report | Summary presented | +| 8 | Send Telegram completion notification | User notified or notification status reported | ## Tracker Discipline (MANDATORY) @@ -514,6 +637,7 @@ Note: Commit hashes are backfilled into story Notes after the milestone commit ( - Not capturing the Codex session ID for resume in subsequent review rounds. - Forgetting to update `story-tracker.md` between stories. - Creating a new worktree when one already exists for a resumed plan. +- Using any completion notification path other than Telegram. ## Rationalizations and Counters @@ -551,3 +675,4 @@ Note: Commit hashes are backfilled into story Notes after the milestone commit ( - [ ] Final test suite passes - [ ] Worktree branch merged to parent and worktree deleted - [ ] Story tracker updated with final status +- [ ] Telegram completion notification attempted if configured diff --git a/skills/implement-plan/cursor/SKILL.md b/skills/implement-plan/cursor/SKILL.md index 822346d..6b40671 100644 --- a/skills/implement-plan/cursor/SKILL.md +++ b/skills/implement-plan/cursor/SKILL.md @@ -188,6 +188,20 @@ else fi ``` +Set helper success-artifact args before writing the command script: + +```bash +HELPER_SUCCESS_FILE_ARGS=() +case "$REVIEWER_CLI" in + codex) + HELPER_SUCCESS_FILE_ARGS+=(--success-file /tmp/milestone-review-${REVIEW_ID}.md) + ;; + cursor) + HELPER_SUCCESS_FILE_ARGS+=(--success-file /tmp/milestone-review-${REVIEW_ID}.json) + ;; +esac +``` + #### Step 2: Write Review Payload Write to `/tmp/milestone-${REVIEW_ID}.md`: @@ -213,6 +227,42 @@ Write to `/tmp/milestone-${REVIEW_ID}.md`: [test output with pass/fail counts] ``` +#### Review Contract (Applies to Every Round) + +The reviewer response must use this structure: + +```text +## Summary +... + +## Findings +### P0 +- ... +### P1 +- ... +### P2 +- ... +### P3 +- ... + +## Verdict +VERDICT: APPROVED +``` + +Rules: +- Order findings from `P0` to `P3`. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- Use `- None.` when a severity has no findings. +- `VERDICT: APPROVED` is allowed only when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking. +- The calling agent should still try to fix `P3` findings when they are cheap and safe. + +#### Liveness Contract (Applies While Review Is Running) + +- The shared reviewer runtime emits `state=in-progress note="In progress N"` heartbeats every 60 seconds while the reviewer child is alive. +- The calling agent must keep waiting as long as a fresh `In progress N` heartbeat keeps arriving roughly once per minute. +- Do not abort just because the review is slow, a soft timeout fired, or a `stall-warning` line appears, as long as the `In progress N` heartbeat continues. +- Treat missing heartbeats, `state=failed`, `state=completed-empty-output`, and `state=needs-operator-decision` as escalation signals. + #### Step 3: Submit to Reviewer (Round 1) Write the reviewer invocation to `/tmp/milestone-review-${REVIEW_ID}.sh` as a bash script: @@ -238,8 +288,22 @@ Evaluate: 4. Test coverage — Are changes adequately tested? 5. Security — Any security concerns introduced? -Be specific and actionable. If solid, end with exactly: VERDICT: APPROVED -If changes are needed, end with exactly: VERDICT: REVISE" + +Return exactly these sections in order: +## Summary +## Findings +### P0 +### P1 +### P2 +### P3 +## Verdict + +Rules: +- Order findings from highest severity to lowest. +- Use `- None.` when a severity has no findings. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- End with exactly one verdict line: `VERDICT: APPROVED` or `VERDICT: REVISE` +- `VERDICT: APPROVED` is allowed only when there are no `P0`, `P1`, or `P2` findings. `P3` findings are non-blocking." ``` Do not try to capture the Codex session ID yet. When using the helper, extract it from `/tmp/milestone-review-${REVIEW_ID}.runner.out` after the command completes (look for `session id: `), then store it as `CODEX_SESSION_ID` for resume in subsequent rounds. @@ -259,8 +323,22 @@ Evaluate: 4. Test coverage — Are changes adequately tested? 5. Security — Any security concerns introduced? -Be specific and actionable. If solid, end with exactly: VERDICT: APPROVED -If changes are needed, end with exactly: VERDICT: REVISE" \ + +Return exactly these sections in order: +## Summary +## Findings +### P0 +### P1 +### P2 +### P3 +## Verdict + +Rules: +- Order findings from highest severity to lowest. +- Use `- None.` when a severity has no findings. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- End with exactly one verdict line: `VERDICT: APPROVED` or `VERDICT: REVISE` +- `VERDICT: APPROVED` is allowed only when there are no `P0`, `P1`, or `P2` findings. `P3` findings are non-blocking." \ --model ${REVIEWER_MODEL} \ --strict-mcp-config \ --setting-sources user @@ -283,8 +361,22 @@ Evaluate: 4. Test coverage — Are changes adequately tested? 5. Security — Any security concerns introduced? -Be specific and actionable. If solid, end with exactly: VERDICT: APPROVED -If changes are needed, end with exactly: VERDICT: REVISE" \ + +Return exactly these sections in order: +## Summary +## Findings +### P0 +### P1 +### P2 +### P3 +## Verdict + +Rules: +- Order findings from highest severity to lowest. +- Use `- None.` when a severity has no findings. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- End with exactly one verdict line: `VERDICT: APPROVED` or `VERDICT: REVISE` +- `VERDICT: APPROVED` is allowed only when there are no `P0`, `P1`, or `P2` findings. `P3` findings are non-blocking." \ > /tmp/milestone-review-${REVIEW_ID}.json ``` @@ -304,13 +396,16 @@ if [ -x "$REVIEWER_RUNTIME" ]; then --command-file /tmp/milestone-review-${REVIEW_ID}.sh \ --stdout-file /tmp/milestone-review-${REVIEW_ID}.runner.out \ --stderr-file /tmp/milestone-review-${REVIEW_ID}.stderr \ - --status-file /tmp/milestone-review-${REVIEW_ID}.status + --status-file /tmp/milestone-review-${REVIEW_ID}.status \ + "${HELPER_SUCCESS_FILE_ARGS[@]}" else echo "Warning: reviewer runtime helper not found at $REVIEWER_RUNTIME; falling back to direct synchronous review." >&2 bash /tmp/milestone-review-${REVIEW_ID}.sh >/tmp/milestone-review-${REVIEW_ID}.runner.out 2>/tmp/milestone-review-${REVIEW_ID}.stderr fi ``` +Run the helper in the foreground and watch its live stdout for `state=in-progress` heartbeats. If your agent environment buffers command output until exit, start the helper in the background and poll `/tmp/milestone-review-${REVIEW_ID}.status` separately instead of treating heartbeats as post-hoc-only data. + After the command completes: - If `REVIEWER_CLI=cursor`, extract the final review text: @@ -320,6 +415,11 @@ jq -r '.result' /tmp/milestone-review-${REVIEW_ID}.json > /tmp/milestone-review- ``` - If `REVIEWER_CLI=codex`, extract `CODEX_SESSION_ID` from `/tmp/milestone-review-${REVIEW_ID}.runner.out` after the helper or fallback run. If the review text is only in `.runner.out`, move or copy the actual review body into `/tmp/milestone-review-${REVIEW_ID}.md` before verdict parsing. +- If `REVIEWER_CLI=claude`, promote stdout captured by the helper or fallback runner into the markdown review file: + +```bash +cp /tmp/milestone-review-${REVIEW_ID}.runner.out /tmp/milestone-review-${REVIEW_ID}.md +``` Fallback is allowed only when the helper is missing or not executable. @@ -338,17 +438,19 @@ Fallback is allowed only when the helper is missing or not executable. [Reviewer feedback] ``` -4. Check verdict: - - **VERDICT: APPROVED** -> proceed to Phase 4 Step 6 (commit & approve) - - **VERDICT: REVISE** -> go to Step 5 - - No clear verdict but positive / no actionable items -> treat as approved +4. While the reviewer is still running, keep waiting as long as fresh `state=in-progress note="In progress N"` heartbeats continue to appear roughly once per minute. +5. Check verdict: + - **VERDICT: APPROVED** with no `P0`, `P1`, or `P2` findings -> proceed to Phase 4 Step 6 (commit & approve) + - **VERDICT: APPROVED** with only `P3` findings -> optionally fix the `P3` items if they are cheap and safe, then proceed + - **VERDICT: REVISE** or any `P0`, `P1`, or `P2` finding -> go to Step 5 + - No clear verdict but `P0`, `P1`, and `P2` are all `- None.` -> treat as approved - Helper state `completed-empty-output` -> treat as failed review attempt, surface stderr/status, fix invocation or prompt handling, then retry - - Helper state `needs-operator-decision` -> surface status log, note any `stall-warning` heartbeat lines as non-terminal operator hints, and decide whether to keep waiting, abort, or retry with different helper parameters + - Helper state `needs-operator-decision` -> surface status log and decide whether to extend the timeout, abort, or retry with different helper parameters - Max rounds (`MAX_ROUNDS`) reached -> present to user for manual decision (proceed or stop) #### Step 5: Address Feedback & Re-verify -1. Address each issue the reviewer raised (do NOT commit yet). +1. Address the reviewer findings in priority order (`P0` -> `P1` -> `P2`, then `P3` when practical) (do NOT commit yet). 2. Re-run verification (lint/typecheck/tests) — all must pass. 3. Update `/tmp/milestone-${REVIEW_ID}.md` with new diff and verification output. @@ -377,8 +479,8 @@ codex exec resume ${CODEX_SESSION_ID} \ Changes made: [List specific changes] -Re-review. If solid, end with: VERDICT: APPROVED -If more changes needed, end with: VERDICT: REVISE" +Re-review using the same `## Summary`, `## Findings`, and `## Verdict` structure as before. +Keep findings ordered `P0` to `P3`, use `- None.` when a severity has no findings, and only use `VERDICT: APPROVED` when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking." ``` If resume fails (session expired), fall back to fresh `codex exec` with context about prior rounds. @@ -400,12 +502,11 @@ $(cat /tmp/milestone-${REVIEW_ID}.md) Changes made: [List specific changes] -Re-review. If solid, end with: VERDICT: APPROVED -If more changes needed, end with: VERDICT: REVISE" \ +Re-review using the same `## Summary`, `## Findings`, and `## Verdict` structure as before. +Keep findings ordered `P0` to `P3`, use `- None.` when a severity has no findings, and only use `VERDICT: APPROVED` when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking." \ --model ${REVIEWER_MODEL} \ --strict-mcp-config \ - --setting-sources user \ - > /tmp/milestone-review-${REVIEW_ID}.md + --setting-sources user ``` **If `REVIEWER_CLI` is `cursor`:** @@ -423,8 +524,8 @@ cursor-agent --resume ${CURSOR_SESSION_ID} -p \ Changes made: [List specific changes] -Re-review. If solid, end with: VERDICT: APPROVED -If more changes needed, end with: VERDICT: REVISE" \ +Re-review using the same `## Summary`, `## Findings`, and `## Verdict` structure as before. +Keep findings ordered `P0` to `P3`, use `- None.` when a severity has no findings, and only use `VERDICT: APPROVED` when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking." \ > /tmp/milestone-review-${REVIEW_ID}.json ``` @@ -486,6 +587,31 @@ Present summary: **Branch:** implement/ (merged and deleted) ``` +### Phase 8: Telegram Completion Notification (MANDATORY) + +Resolve the Telegram notifier helper from Cursor's installed skills directory: + +```bash +if [ -x .cursor/skills/reviewer-runtime/notify-telegram.sh ]; then + TELEGRAM_NOTIFY_RUNTIME=.cursor/skills/reviewer-runtime/notify-telegram.sh +else + TELEGRAM_NOTIFY_RUNTIME=~/.cursor/skills/reviewer-runtime/notify-telegram.sh +fi +``` + +On every terminal outcome for the implement-plan run (fully completed, stopped after max rounds, skipped reviewer, or failure), send a Telegram summary if the helper exists and both `TELEGRAM_BOT_TOKEN` and `TELEGRAM_CHAT_ID` are configured: + +```bash +if [ -x "$TELEGRAM_NOTIFY_RUNTIME" ] && [ -n "${TELEGRAM_BOT_TOKEN:-}" ] && [ -n "${TELEGRAM_CHAT_ID:-}" ]; then + "$TELEGRAM_NOTIFY_RUNTIME" --message "implement-plan completed for : " +fi +``` + +Rules: +- Telegram is the only supported completion notification path. Do not use desktop notifications, `say`, email, or any other notifier. +- Notification failures are non-blocking, but they must be surfaced to the user. +- If Telegram is not configured, state that no completion notification was sent. + ## Quick Reference | Phase | Action | Required Output | @@ -497,6 +623,7 @@ Present summary: | 5 | Milestone review loop (per milestone) | Reviewer approval or max rounds + user override | | 6 | Invoke `superpowers:finishing-a-development-branch` | Branch merged to parent, worktree deleted | | 7 | Final report | Summary presented | +| 8 | Send Telegram completion notification | User notified or notification status reported | ## Tracker Discipline (MANDATORY) @@ -522,6 +649,7 @@ Note: Commit hashes are backfilled into story Notes after the milestone commit ( - Skipping worktree setup and working directly on the main branch. - Forgetting to update `story-tracker.md` between stories. - Creating a new worktree when one already exists for a resumed plan. +- Using any completion notification path other than Telegram. ## Red Flags - Stop and Correct @@ -550,3 +678,4 @@ Note: Commit hashes are backfilled into story Notes after the milestone commit ( - [ ] Final test suite passes - [ ] Worktree branch merged to parent and worktree deleted - [ ] Story tracker updated with final status +- [ ] Telegram completion notification attempted if configured diff --git a/skills/implement-plan/opencode/SKILL.md b/skills/implement-plan/opencode/SKILL.md index b310a81..49f355d 100644 --- a/skills/implement-plan/opencode/SKILL.md +++ b/skills/implement-plan/opencode/SKILL.md @@ -169,6 +169,20 @@ Resolve the shared runtime helper path before writing the command script: REVIEWER_RUNTIME=~/.config/opencode/skills/reviewer-runtime/run-review.sh ``` +Set helper success-artifact args before writing the command script: + +```bash +HELPER_SUCCESS_FILE_ARGS=() +case "$REVIEWER_CLI" in + codex) + HELPER_SUCCESS_FILE_ARGS+=(--success-file /tmp/milestone-review-${REVIEW_ID}.md) + ;; + cursor) + HELPER_SUCCESS_FILE_ARGS+=(--success-file /tmp/milestone-review-${REVIEW_ID}.json) + ;; +esac +``` + #### Step 2: Write Review Payload Write to `/tmp/milestone-${REVIEW_ID}.md`: @@ -194,6 +208,42 @@ Write to `/tmp/milestone-${REVIEW_ID}.md`: [test output with pass/fail counts] ``` +#### Review Contract (Applies to Every Round) + +The reviewer response must use this structure: + +```text +## Summary +... + +## Findings +### P0 +- ... +### P1 +- ... +### P2 +- ... +### P3 +- ... + +## Verdict +VERDICT: APPROVED +``` + +Rules: +- Order findings from `P0` to `P3`. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- Use `- None.` when a severity has no findings. +- `VERDICT: APPROVED` is allowed only when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking. +- The calling agent should still try to fix `P3` findings when they are cheap and safe. + +#### Liveness Contract (Applies While Review Is Running) + +- The shared reviewer runtime emits `state=in-progress note="In progress N"` heartbeats every 60 seconds while the reviewer child is alive. +- The calling agent must keep waiting as long as a fresh `In progress N` heartbeat keeps arriving roughly once per minute. +- Do not abort just because the review is slow, a soft timeout fired, or a `stall-warning` line appears, as long as the `In progress N` heartbeat continues. +- Treat missing heartbeats, `state=failed`, `state=completed-empty-output`, and `state=needs-operator-decision` as escalation signals. + #### Step 3: Submit to Reviewer (Round 1) Write the reviewer invocation to `/tmp/milestone-review-${REVIEW_ID}.sh` as a bash script: @@ -219,8 +269,22 @@ Evaluate: 4. Test coverage — Are changes adequately tested? 5. Security — Any security concerns introduced? -Be specific and actionable. If solid, end with exactly: VERDICT: APPROVED -If changes are needed, end with exactly: VERDICT: REVISE" + +Return exactly these sections in order: +## Summary +## Findings +### P0 +### P1 +### P2 +### P3 +## Verdict + +Rules: +- Order findings from highest severity to lowest. +- Use `- None.` when a severity has no findings. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- End with exactly one verdict line: `VERDICT: APPROVED` or `VERDICT: REVISE` +- `VERDICT: APPROVED` is allowed only when there are no `P0`, `P1`, or `P2` findings. `P3` findings are non-blocking." ``` Do not try to capture the Codex session ID yet. When using the helper, extract it from `/tmp/milestone-review-${REVIEW_ID}.runner.out` after the command completes (look for `session id: `), then store it as `CODEX_SESSION_ID` for resume in subsequent rounds. @@ -240,8 +304,22 @@ Evaluate: 4. Test coverage — Are changes adequately tested? 5. Security — Any security concerns introduced? -Be specific and actionable. If solid, end with exactly: VERDICT: APPROVED -If changes are needed, end with exactly: VERDICT: REVISE" \ + +Return exactly these sections in order: +## Summary +## Findings +### P0 +### P1 +### P2 +### P3 +## Verdict + +Rules: +- Order findings from highest severity to lowest. +- Use `- None.` when a severity has no findings. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- End with exactly one verdict line: `VERDICT: APPROVED` or `VERDICT: REVISE` +- `VERDICT: APPROVED` is allowed only when there are no `P0`, `P1`, or `P2` findings. `P3` findings are non-blocking." \ --model ${REVIEWER_MODEL} \ --strict-mcp-config \ --setting-sources user @@ -264,8 +342,22 @@ Evaluate: 4. Test coverage — Are changes adequately tested? 5. Security — Any security concerns introduced? -Be specific and actionable. If solid, end with exactly: VERDICT: APPROVED -If changes are needed, end with exactly: VERDICT: REVISE" \ + +Return exactly these sections in order: +## Summary +## Findings +### P0 +### P1 +### P2 +### P3 +## Verdict + +Rules: +- Order findings from highest severity to lowest. +- Use `- None.` when a severity has no findings. +- `P0` = total blocker, `P1` = major risk, `P2` = must-fix before approval, `P3` = cosmetic / nice to have. +- End with exactly one verdict line: `VERDICT: APPROVED` or `VERDICT: REVISE` +- `VERDICT: APPROVED` is allowed only when there are no `P0`, `P1`, or `P2` findings. `P3` findings are non-blocking." \ > /tmp/milestone-review-${REVIEW_ID}.json ``` @@ -279,13 +371,16 @@ if [ -x "$REVIEWER_RUNTIME" ]; then --command-file /tmp/milestone-review-${REVIEW_ID}.sh \ --stdout-file /tmp/milestone-review-${REVIEW_ID}.runner.out \ --stderr-file /tmp/milestone-review-${REVIEW_ID}.stderr \ - --status-file /tmp/milestone-review-${REVIEW_ID}.status + --status-file /tmp/milestone-review-${REVIEW_ID}.status \ + "${HELPER_SUCCESS_FILE_ARGS[@]}" else echo "Warning: reviewer runtime helper not found at $REVIEWER_RUNTIME; falling back to direct synchronous review." >&2 bash /tmp/milestone-review-${REVIEW_ID}.sh >/tmp/milestone-review-${REVIEW_ID}.runner.out 2>/tmp/milestone-review-${REVIEW_ID}.stderr fi ``` +Run the helper in the foreground and watch its live stdout for `state=in-progress` heartbeats. If your agent environment buffers command output until exit, start the helper in the background and poll `/tmp/milestone-review-${REVIEW_ID}.status` separately instead of treating heartbeats as post-hoc-only data. + After the command completes: - If `REVIEWER_CLI=cursor`, extract the final review text: @@ -295,6 +390,11 @@ jq -r '.result' /tmp/milestone-review-${REVIEW_ID}.json > /tmp/milestone-review- ``` - If `REVIEWER_CLI=codex`, extract `CODEX_SESSION_ID` from `/tmp/milestone-review-${REVIEW_ID}.runner.out` after the helper or fallback run. If the review text is only in `.runner.out`, move or copy the actual review body into `/tmp/milestone-review-${REVIEW_ID}.md` before verdict parsing. +- If `REVIEWER_CLI=claude`, promote stdout captured by the helper or fallback runner into the markdown review file: + +```bash +cp /tmp/milestone-review-${REVIEW_ID}.runner.out /tmp/milestone-review-${REVIEW_ID}.md +``` Fallback is allowed only when the helper is missing or not executable. @@ -313,17 +413,19 @@ Fallback is allowed only when the helper is missing or not executable. [Reviewer feedback] ``` -4. Check verdict: - - **VERDICT: APPROVED** -> proceed to Phase 5 Step 6 (commit & approve) - - **VERDICT: REVISE** -> go to Step 5 - - No clear verdict but positive / no actionable items -> treat as approved +4. While the reviewer is still running, keep waiting as long as fresh `state=in-progress note="In progress N"` heartbeats continue to appear roughly once per minute. +5. Check verdict: + - **VERDICT: APPROVED** with no `P0`, `P1`, or `P2` findings -> proceed to Phase 5 Step 6 (commit & approve) + - **VERDICT: APPROVED** with only `P3` findings -> optionally fix the `P3` items if they are cheap and safe, then proceed + - **VERDICT: REVISE** or any `P0`, `P1`, or `P2` finding -> go to Step 5 + - No clear verdict but `P0`, `P1`, and `P2` are all `- None.` -> treat as approved - Helper state `completed-empty-output` -> treat as failed review attempt, surface stderr/status, fix invocation or prompt handling, then retry - - Helper state `needs-operator-decision` -> surface status log, note any `stall-warning` heartbeat lines as non-terminal operator hints, and decide whether to keep waiting, abort, or retry with different helper parameters + - Helper state `needs-operator-decision` -> surface status log and decide whether to extend the timeout, abort, or retry with different helper parameters - Max rounds (`MAX_ROUNDS`) reached -> present to user for manual decision (proceed or stop) #### Step 5: Address Feedback & Re-verify -1. Address each issue the reviewer raised (do NOT commit yet). +1. Address the reviewer findings in priority order (`P0` -> `P1` -> `P2`, then `P3` when practical) (do NOT commit yet). 2. Re-run verification (lint/typecheck/tests) — all must pass. 3. Update `/tmp/milestone-${REVIEW_ID}.md` with new diff and verification output. @@ -352,8 +454,8 @@ codex exec resume ${CODEX_SESSION_ID} \ Changes made: [List specific changes] -Re-review. If solid, end with: VERDICT: APPROVED -If more changes needed, end with: VERDICT: REVISE" +Re-review using the same `## Summary`, `## Findings`, and `## Verdict` structure as before. +Keep findings ordered `P0` to `P3`, use `- None.` when a severity has no findings, and only use `VERDICT: APPROVED` when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking." ``` If resume fails (session expired), fall back to fresh `codex exec` with context about prior rounds. @@ -375,12 +477,11 @@ $(cat /tmp/milestone-${REVIEW_ID}.md) Changes made: [List specific changes] -Re-review. If solid, end with: VERDICT: APPROVED -If more changes needed, end with: VERDICT: REVISE" \ +Re-review using the same `## Summary`, `## Findings`, and `## Verdict` structure as before. +Keep findings ordered `P0` to `P3`, use `- None.` when a severity has no findings, and only use `VERDICT: APPROVED` when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking." \ --model ${REVIEWER_MODEL} \ --strict-mcp-config \ - --setting-sources user \ - > /tmp/milestone-review-${REVIEW_ID}.md + --setting-sources user ``` **If `REVIEWER_CLI` is `cursor`:** @@ -398,8 +499,8 @@ cursor-agent --resume ${CURSOR_SESSION_ID} -p \ Changes made: [List specific changes] -Re-review. If solid, end with: VERDICT: APPROVED -If more changes needed, end with: VERDICT: REVISE" \ +Re-review using the same `## Summary`, `## Findings`, and `## Verdict` structure as before. +Keep findings ordered `P0` to `P3`, use `- None.` when a severity has no findings, and only use `VERDICT: APPROVED` when no `P0`, `P1`, or `P2` findings remain. `P3` findings are non-blocking." \ > /tmp/milestone-review-${REVIEW_ID}.json ``` @@ -461,6 +562,27 @@ Present summary: **Branch:** implement/ (merged and deleted) ``` +### Phase 9: Telegram Completion Notification (MANDATORY) + +Resolve the Telegram notifier helper from the installed OpenCode skills directory: + +```bash +TELEGRAM_NOTIFY_RUNTIME=~/.config/opencode/skills/reviewer-runtime/notify-telegram.sh +``` + +On every terminal outcome for the implement-plan run (fully completed, stopped after max rounds, skipped reviewer, or failure), send a Telegram summary if the helper exists and both `TELEGRAM_BOT_TOKEN` and `TELEGRAM_CHAT_ID` are configured: + +```bash +if [ -x "$TELEGRAM_NOTIFY_RUNTIME" ] && [ -n "${TELEGRAM_BOT_TOKEN:-}" ] && [ -n "${TELEGRAM_CHAT_ID:-}" ]; then + "$TELEGRAM_NOTIFY_RUNTIME" --message "implement-plan completed for : " +fi +``` + +Rules: +- Telegram is the only supported completion notification path. Do not use desktop notifications, `say`, email, or any other notifier. +- Notification failures are non-blocking, but they must be surfaced to the user. +- If Telegram is not configured, state that no completion notification was sent. + ## Tracker Discipline (MANDATORY) Before starting any story: @@ -493,3 +615,4 @@ Note: Commit hashes are backfilled into story Notes after the milestone commit ( - [ ] Final test suite passes - [ ] Worktree branch merged to parent and worktree deleted - [ ] Story tracker updated with final status +- [ ] Telegram completion notification attempted if configured diff --git a/skills/reviewer-runtime/notify-telegram.sh b/skills/reviewer-runtime/notify-telegram.sh new file mode 100755 index 0000000..814524b --- /dev/null +++ b/skills/reviewer-runtime/notify-telegram.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash +set -euo pipefail + +DEFAULT_API_BASE_URL="https://api.telegram.org" +DEFAULT_PARSE_MODE="HTML" +MAX_MESSAGE_LENGTH=4096 + +BOT_TOKEN=${TELEGRAM_BOT_TOKEN:-} +CHAT_ID=${TELEGRAM_CHAT_ID:-} +API_BASE_URL=${TELEGRAM_API_BASE_URL:-$DEFAULT_API_BASE_URL} +PARSE_MODE=${TELEGRAM_PARSE_MODE:-$DEFAULT_PARSE_MODE} +MESSAGE="" +MESSAGE_FILE="" + +usage() { + cat <<'EOF' +Usage: + notify-telegram.sh --message [--bot-token ] [--chat-id ] [--api-base-url ] + notify-telegram.sh --message-file [--bot-token ] [--chat-id ] [--api-base-url ] + +Environment fallbacks: + TELEGRAM_BOT_TOKEN + TELEGRAM_CHAT_ID + TELEGRAM_API_BASE_URL + TELEGRAM_PARSE_MODE +EOF +} + +fail_usage() { + echo "Error: $*" >&2 + usage >&2 + exit 2 +} + +parse_args() { + while [[ $# -gt 0 ]]; do + case "$1" in + --bot-token) + BOT_TOKEN=${2:-} + shift 2 + ;; + --chat-id) + CHAT_ID=${2:-} + shift 2 + ;; + --api-base-url) + API_BASE_URL=${2:-} + shift 2 + ;; + --message) + MESSAGE=${2:-} + shift 2 + ;; + --message-file) + MESSAGE_FILE=${2:-} + shift 2 + ;; + --help|-h) + usage + exit 0 + ;; + *) + fail_usage "unknown argument: $1" + ;; + esac + done + + if [[ -n "$MESSAGE" && -n "$MESSAGE_FILE" ]]; then + fail_usage "use either --message or --message-file, not both" + fi + + if [[ -n "$MESSAGE_FILE" ]]; then + [[ -r "$MESSAGE_FILE" ]] || fail_usage "message file is not readable: $MESSAGE_FILE" + MESSAGE=$(<"$MESSAGE_FILE") + fi + + [[ -n "$MESSAGE" ]] || fail_usage "message is required" + [[ -n "$BOT_TOKEN" ]] || fail_usage "bot token is required (use --bot-token or TELEGRAM_BOT_TOKEN)" + [[ -n "$CHAT_ID" ]] || fail_usage "chat id is required (use --chat-id or TELEGRAM_CHAT_ID)" + command -v curl >/dev/null 2>&1 || fail_usage "curl is required" + + if [[ ${#MESSAGE} -gt "$MAX_MESSAGE_LENGTH" ]]; then + MESSAGE=${MESSAGE:0:$MAX_MESSAGE_LENGTH} + fi +} + +main() { + parse_args "$@" + + curl -fsS -X POST \ + "${API_BASE_URL%/}/bot${BOT_TOKEN}/sendMessage" \ + --data-urlencode "chat_id=${CHAT_ID}" \ + --data-urlencode "text=${MESSAGE}" \ + --data-urlencode "parse_mode=${PARSE_MODE}" \ + --data-urlencode "disable_web_page_preview=true" \ + >/dev/null +} + +main "$@" diff --git a/skills/reviewer-runtime/run-review.sh b/skills/reviewer-runtime/run-review.sh index 55eb202..bc6e3a1 100755 --- a/skills/reviewer-runtime/run-review.sh +++ b/skills/reviewer-runtime/run-review.sh @@ -2,6 +2,7 @@ set -euo pipefail DEFAULT_POLL_SECONDS=10 +DEFAULT_HEARTBEAT_SECONDS=60 DEFAULT_SOFT_TIMEOUT_SECONDS=600 DEFAULT_STALL_WARNING_SECONDS=300 DEFAULT_HARD_TIMEOUT_SECONDS=1800 @@ -12,7 +13,9 @@ COMMAND_FILE="" STDOUT_FILE="" STDERR_FILE="" STATUS_FILE="" +SUCCESS_FILES=() POLL_SECONDS=$DEFAULT_POLL_SECONDS +HEARTBEAT_SECONDS=$DEFAULT_HEARTBEAT_SECONDS SOFT_TIMEOUT_SECONDS=$DEFAULT_SOFT_TIMEOUT_SECONDS STALL_WARNING_SECONDS=$DEFAULT_STALL_WARNING_SECONDS HARD_TIMEOUT_SECONDS=$DEFAULT_HARD_TIMEOUT_SECONDS @@ -29,7 +32,9 @@ Usage: --stdout-file \ --stderr-file \ --status-file \ + [--success-file ] \ [--poll-seconds ] \ + [--heartbeat-seconds ] \ [--soft-timeout-seconds ] \ [--stall-warning-seconds ] \ [--hard-timeout-seconds ] @@ -55,6 +60,25 @@ escape_note() { printf '%s' "$note" } +join_success_files() { + if [[ ${#SUCCESS_FILES[@]} -eq 0 ]]; then + printf '' + return 0 + fi + + local joined="" + local path + + for path in "${SUCCESS_FILES[@]}"; do + if [[ -n "$joined" ]]; then + joined+=", " + fi + joined+="$path" + done + + printf '%s' "$joined" +} + iso_timestamp() { date -u +"%Y-%m-%dT%H:%M:%SZ" } @@ -147,10 +171,18 @@ parse_args() { STATUS_FILE=${2:-} shift 2 ;; + --success-file) + SUCCESS_FILES+=("${2:-}") + shift 2 + ;; --poll-seconds) POLL_SECONDS=${2:-} shift 2 ;; + --heartbeat-seconds) + HEARTBEAT_SECONDS=${2:-} + shift 2 + ;; --soft-timeout-seconds) SOFT_TIMEOUT_SECONDS=${2:-} shift 2 @@ -179,11 +211,13 @@ parse_args() { [[ -n "$STATUS_FILE" ]] || fail_usage "--status-file is required" require_integer "poll-seconds" "$POLL_SECONDS" + require_integer "heartbeat-seconds" "$HEARTBEAT_SECONDS" require_integer "soft-timeout-seconds" "$SOFT_TIMEOUT_SECONDS" require_integer "stall-warning-seconds" "$STALL_WARNING_SECONDS" require_integer "hard-timeout-seconds" "$HARD_TIMEOUT_SECONDS" [[ "$POLL_SECONDS" -gt 0 ]] || fail_usage "poll-seconds must be > 0" + [[ "$HEARTBEAT_SECONDS" -gt 0 ]] || fail_usage "heartbeat-seconds must be > 0" [[ "$SOFT_TIMEOUT_SECONDS" -gt 0 ]] || fail_usage "soft-timeout-seconds must be > 0" [[ "$STALL_WARNING_SECONDS" -gt 0 ]] || fail_usage "stall-warning-seconds must be > 0" [[ "$HARD_TIMEOUT_SECONDS" -gt 0 ]] || fail_usage "hard-timeout-seconds must be > 0" @@ -227,8 +261,10 @@ main() { local last_stdout_bytes=0 local last_stderr_bytes=0 local last_output_change_time=$START_TIME + local last_heartbeat_time=$START_TIME local soft_timeout_logged=0 local stall_warning_logged=0 + local heartbeat_count=0 while kill -0 "$CHILD_PID" 2>/dev/null; do sleep "$POLL_SECONDS" @@ -239,6 +275,12 @@ main() { stdout_bytes=$(file_bytes "$STDOUT_FILE") stderr_bytes=$(file_bytes "$STDERR_FILE") + if [[ $((now - last_heartbeat_time)) -ge "$HEARTBEAT_SECONDS" ]]; then + heartbeat_count=$((heartbeat_count + 1)) + append_status info in-progress "In progress ${heartbeat_count}" + last_heartbeat_time=$now + fi + if [[ "$stdout_bytes" -ne "$last_stdout_bytes" || "$stderr_bytes" -ne "$last_stderr_bytes" ]]; then last_output_change_time=$now stall_warning_logged=0 @@ -285,6 +327,7 @@ main() { trap - EXIT local final_stdout_bytes final_stderr_bytes + local success_file success_bytes final_stdout_bytes=$(file_bytes "$STDOUT_FILE") final_stderr_bytes=$(file_bytes "$STDERR_FILE") @@ -294,6 +337,16 @@ main() { exit 0 fi + if [[ ${#SUCCESS_FILES[@]} -gt 0 ]]; then + for success_file in "${SUCCESS_FILES[@]}"; do + success_bytes=$(file_bytes "$success_file") + if [[ "$success_bytes" -gt 0 ]]; then + append_status info completed "reviewer completed successfully via success file $(join_success_files)" + exit 0 + fi + done + fi + append_status error completed-empty-output "reviewer exited successfully with empty stdout" exit "$EXIT_COMPLETED_EMPTY_OUTPUT" fi diff --git a/skills/reviewer-runtime/tests/claude-review-template-guard.sh b/skills/reviewer-runtime/tests/claude-review-template-guard.sh index 2000c86..c65c1fa 100755 --- a/skills/reviewer-runtime/tests/claude-review-template-guard.sh +++ b/skills/reviewer-runtime/tests/claude-review-template-guard.sh @@ -34,6 +34,9 @@ check_skill_file() { assert_contains "$file" '$(cat /tmp/' assert_contains "$file" "--strict-mcp-config" assert_contains "$file" "--setting-sources user" + assert_contains "$file" "### P0" + assert_contains "$file" "In progress N" + assert_contains "$file" "notify-telegram.sh" assert_not_contains "$file" "--allowedTools Read" } diff --git a/skills/reviewer-runtime/tests/smoke-test.sh b/skills/reviewer-runtime/tests/smoke-test.sh old mode 100644 new mode 100755 index bfb86a7..5597396 --- a/skills/reviewer-runtime/tests/smoke-test.sh +++ b/skills/reviewer-runtime/tests/smoke-test.sh @@ -66,6 +66,28 @@ run_helper() { return "$exit_code" } +run_helper_allowing_success_file() { + local command_file=$1 + local stdout_file=$2 + local stderr_file=$3 + local status_file=$4 + local success_file=$5 + shift 5 + + set +e + "$HELPER_PATH" \ + --command-file "$command_file" \ + --stdout-file "$stdout_file" \ + --stderr-file "$stderr_file" \ + --status-file "$status_file" \ + --success-file "$success_file" \ + "$@" + local exit_code=$? + set -e + + return "$exit_code" +} + test_delayed_success() { local dir=$1 local command_file=$dir/delayed-success.sh @@ -120,6 +142,36 @@ printf "completed after soft timeout\n" assert_file_contains "$status_file" "state=completed" } +test_in_progress_heartbeats() { + local dir=$1 + local command_file=$dir/in-progress-heartbeats.sh + local stdout_file=$dir/in-progress-heartbeats.stdout + local stderr_file=$dir/in-progress-heartbeats.stderr + local status_file=$dir/in-progress-heartbeats.status + + make_command "$command_file" ' +sleep 3 +printf "finished with heartbeat coverage\n" +' + + if run_helper "$command_file" "$stdout_file" "$stderr_file" "$status_file" \ + --poll-seconds 1 \ + --heartbeat-seconds 1 \ + --soft-timeout-seconds 5 \ + --stall-warning-seconds 4 \ + --hard-timeout-seconds 10; then + local exit_code=0 + else + local exit_code=$? + fi + + assert_exit_code "$exit_code" 0 + assert_file_contains "$stdout_file" "finished with heartbeat coverage" + assert_file_contains "$status_file" "state=in-progress" + assert_file_contains "$status_file" "In progress 1" + assert_file_contains "$status_file" "In progress 2" +} + test_nonzero_failure() { local dir=$1 local command_file=$dir/nonzero-failure.sh @@ -173,6 +225,34 @@ exit 0 assert_file_contains "$status_file" "state=completed-empty-output" } +test_success_file_allows_empty_stdout() { + local dir=$1 + local command_file=$dir/success-file.sh + local stdout_file=$dir/success-file.stdout + local stderr_file=$dir/success-file.stderr + local status_file=$dir/success-file.status + local success_file=$dir/review-output.md + + make_command "$command_file" " +printf 'review body from redirected file\\n' > \"$success_file\" +exit 0 +" + + if run_helper_allowing_success_file "$command_file" "$stdout_file" "$stderr_file" "$status_file" "$success_file" \ + --poll-seconds 1 \ + --soft-timeout-seconds 5 \ + --stall-warning-seconds 3 \ + --hard-timeout-seconds 10; then + local exit_code=0 + else + local exit_code=$? + fi + + assert_exit_code "$exit_code" 0 + assert_file_contains "$success_file" "review body from redirected file" + assert_file_contains "$status_file" "state=completed" +} + test_signal_cleanup() { local dir=$1 local command_file=$dir/signal-child.sh @@ -252,8 +332,10 @@ main() { test_delayed_success "$tmp_dir" test_soft_timeout_continues "$tmp_dir" + test_in_progress_heartbeats "$tmp_dir" test_nonzero_failure "$tmp_dir" test_empty_output_is_terminal "$tmp_dir" + test_success_file_allows_empty_stdout "$tmp_dir" test_signal_cleanup "$tmp_dir" test_hard_timeout_escalation "$tmp_dir" diff --git a/skills/reviewer-runtime/tests/telegram-notify-test.sh b/skills/reviewer-runtime/tests/telegram-notify-test.sh new file mode 100755 index 0000000..d48341d --- /dev/null +++ b/skills/reviewer-runtime/tests/telegram-notify-test.sh @@ -0,0 +1,158 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +HELPER_PATH=$(cd "$SCRIPT_DIR/.." && pwd)/notify-telegram.sh + +fail() { + echo "FAIL: $*" >&2 + exit 1 +} + +assert_equals() { + local actual=$1 + local expected=$2 + if [[ "$actual" != "$expected" ]]; then + fail "expected '$expected', got '$actual'" + fi +} + +assert_file_contains() { + local file=$1 + local pattern=$2 + if ! grep -qF -- "$pattern" "$file"; then + echo "--- $file ---" >&2 + sed -n '1,200p' "$file" >&2 || true + fail "expected '$pattern' in $file" + fi +} + +capture_curl() { + local bin_dir=$1 + local curl_args_file=$2 + + mkdir -p "$bin_dir" + cat >"$bin_dir/curl" <"$curl_args_file" +printf '{"ok":true}\n' +EOF + chmod +x "$bin_dir/curl" +} + +read_curl_value() { + local file=$1 + local key=$2 + + awk -v prefix="${key}=" 'index($0, prefix) == 1 { print substr($0, length(prefix) + 1); exit }' "$file" +} + +test_missing_credentials() { + local tmp_dir=$1 + local output_file=$tmp_dir/missing-credentials.out + + set +e + env -u TELEGRAM_BOT_TOKEN -u TELEGRAM_CHAT_ID \ + "$HELPER_PATH" --message "hello" >"$output_file" 2>&1 + local exit_code=$? + set -e + + [[ "$exit_code" -eq 2 ]] || fail "expected exit code 2 for missing credentials, got $exit_code" + assert_file_contains "$output_file" "bot token is required" +} + +test_rejects_message_and_message_file_together() { + local tmp_dir=$1 + local message_file=$tmp_dir/message.txt + local output_file=$tmp_dir/message-and-file.out + + printf 'hello from file\n' >"$message_file" + + set +e + TELEGRAM_BOT_TOKEN=test-token \ + TELEGRAM_CHAT_ID=123456 \ + "$HELPER_PATH" --message "hello" --message-file "$message_file" >"$output_file" 2>&1 + local exit_code=$? + set -e + + [[ "$exit_code" -eq 2 ]] || fail "expected exit code 2 for mutually exclusive arguments, got $exit_code" + assert_file_contains "$output_file" "use either --message or --message-file, not both" +} + +test_successful_request() { + local tmp_dir=$1 + local bin_dir=$tmp_dir/bin + local curl_args_file=$tmp_dir/curl-args.txt + capture_curl "$bin_dir" "$curl_args_file" + + PATH="$bin_dir:$PATH" \ + TELEGRAM_BOT_TOKEN=test-token \ + TELEGRAM_CHAT_ID=123456 \ + "$HELPER_PATH" --message "Plan completed" + + assert_file_contains "$curl_args_file" "https://api.telegram.org/bottest-token/sendMessage" + assert_file_contains "$curl_args_file" "chat_id=123456" + assert_file_contains "$curl_args_file" "text=Plan completed" + assert_file_contains "$curl_args_file" "disable_web_page_preview=true" + assert_file_contains "$curl_args_file" "parse_mode=HTML" +} + +test_message_file_and_custom_api_base() { + local tmp_dir=$1 + local bin_dir=$tmp_dir/bin-message-file + local curl_args_file=$tmp_dir/curl-message-file.txt + local message_file=$tmp_dir/telegram-message.txt + capture_curl "$bin_dir" "$curl_args_file" + + printf 'Plan completed from file\n' >"$message_file" + + PATH="$bin_dir:$PATH" \ + TELEGRAM_BOT_TOKEN=test-token \ + TELEGRAM_CHAT_ID=654321 \ + "$HELPER_PATH" \ + --message-file "$message_file" \ + --api-base-url "https://telegram.example.test/custom" + + assert_file_contains "$curl_args_file" "https://telegram.example.test/custom/bottest-token/sendMessage" + assert_file_contains "$curl_args_file" "chat_id=654321" + assert_file_contains "$curl_args_file" "text=Plan completed from file" +} + +test_truncates_long_message() { + local tmp_dir=$1 + local bin_dir=$tmp_dir/bin-truncate + local curl_args_file=$tmp_dir/curl-truncate.txt + local long_message_file=$tmp_dir/long-message.txt + local truncated_message + capture_curl "$bin_dir" "$curl_args_file" + + python3 - <<'PY' >"$long_message_file" +print("A" * 5000, end="") +PY + + PATH="$bin_dir:$PATH" \ + TELEGRAM_BOT_TOKEN=test-token \ + TELEGRAM_CHAT_ID=123456 \ + "$HELPER_PATH" --message-file "$long_message_file" + + truncated_message=$(read_curl_value "$curl_args_file" "text") + assert_equals "${#truncated_message}" "4096" +} + +main() { + [[ -x "$HELPER_PATH" ]] || fail "helper is not executable: $HELPER_PATH" + + local tmp_dir + tmp_dir=$(mktemp -d) + trap "rm -rf '$tmp_dir'" EXIT + + test_missing_credentials "$tmp_dir" + test_rejects_message_and_message_file_together "$tmp_dir" + test_successful_request "$tmp_dir" + test_message_file_and_custom_api_base "$tmp_dir" + test_truncates_long_message "$tmp_dir" + + echo "PASS: telegram notifier tests" +} + +main "$@"