#!/usr/bin/env bash set -euo pipefail DEFAULT_POLL_SECONDS=10 DEFAULT_HEARTBEAT_SECONDS=60 DEFAULT_SOFT_TIMEOUT_SECONDS=600 DEFAULT_STALL_WARNING_SECONDS=300 DEFAULT_HARD_TIMEOUT_SECONDS=1800 EXIT_COMPLETED_EMPTY_OUTPUT=80 EXIT_NEEDS_OPERATOR_DECISION=81 COMMAND_FILE="" STDOUT_FILE="" STDERR_FILE="" STATUS_FILE="" SUCCESS_FILES=() POLL_SECONDS=$DEFAULT_POLL_SECONDS HEARTBEAT_SECONDS=$DEFAULT_HEARTBEAT_SECONDS SOFT_TIMEOUT_SECONDS=$DEFAULT_SOFT_TIMEOUT_SECONDS STALL_WARNING_SECONDS=$DEFAULT_STALL_WARNING_SECONDS HARD_TIMEOUT_SECONDS=$DEFAULT_HARD_TIMEOUT_SECONDS CHILD_PID="" USE_GROUP_KILL=0 INTERRUPTED=0 usage() { cat <<'EOF' Usage: run-review.sh \ --command-file \ --stdout-file \ --stderr-file \ --status-file \ [--success-file ] \ [--poll-seconds ] \ [--heartbeat-seconds ] \ [--soft-timeout-seconds ] \ [--stall-warning-seconds ] \ [--hard-timeout-seconds ] EOF } fail_usage() { echo "Error: $*" >&2 usage >&2 exit 2 } require_integer() { local name=$1 local value=$2 [[ "$value" =~ ^[0-9]+$ ]] || fail_usage "$name must be an integer" } escape_note() { local note=$1 note=${note//$'\n'/ } note=${note//\"/\'} printf '%s' "$note" } join_success_files() { if [[ ${#SUCCESS_FILES[@]} -eq 0 ]]; then printf '' return 0 fi local joined="" local path for path in "${SUCCESS_FILES[@]}"; do if [[ -n "$joined" ]]; then joined+=", " fi joined+="$path" done printf '%s' "$joined" } iso_timestamp() { date -u +"%Y-%m-%dT%H:%M:%SZ" } elapsed_seconds() { local now now=$(date +%s) printf '%s' $((now - START_TIME)) } file_bytes() { local path=$1 if [[ -f "$path" ]]; then wc -c <"$path" | tr -d '[:space:]' else printf '0' fi } append_status() { local level=$1 local state=$2 local note=$3 local elapsed pid stdout_bytes stderr_bytes line elapsed=$(elapsed_seconds) pid=${CHILD_PID:-0} stdout_bytes=$(file_bytes "$STDOUT_FILE") stderr_bytes=$(file_bytes "$STDERR_FILE") line="ts=$(iso_timestamp) level=$level state=$state elapsed_s=$elapsed pid=$pid stdout_bytes=$stdout_bytes stderr_bytes=$stderr_bytes note=\"$(escape_note "$note")\"" printf '%s\n' "$line" | tee -a "$STATUS_FILE" } ensure_parent_dir() { local path=$1 mkdir -p "$(dirname "$path")" } kill_child_process_group() { if [[ -z "$CHILD_PID" ]]; then return 0 fi if ! kill -0 "$CHILD_PID" 2>/dev/null; then return 0 fi if [[ "$USE_GROUP_KILL" -eq 1 ]]; then kill -TERM -- "-$CHILD_PID" 2>/dev/null || kill -TERM "$CHILD_PID" 2>/dev/null || true else kill -TERM "$CHILD_PID" 2>/dev/null || true fi sleep 1 if kill -0 "$CHILD_PID" 2>/dev/null; then if [[ "$USE_GROUP_KILL" -eq 1 ]]; then kill -KILL -- "-$CHILD_PID" 2>/dev/null || kill -KILL "$CHILD_PID" 2>/dev/null || true else kill -KILL "$CHILD_PID" 2>/dev/null || true fi fi } handle_signal() { local signal_name=$1 INTERRUPTED=1 append_status error failed "received SIG${signal_name}; terminating reviewer child" kill_child_process_group exit 130 } parse_args() { while [[ $# -gt 0 ]]; do case "$1" in --command-file) COMMAND_FILE=${2:-} shift 2 ;; --stdout-file) STDOUT_FILE=${2:-} shift 2 ;; --stderr-file) STDERR_FILE=${2:-} shift 2 ;; --status-file) STATUS_FILE=${2:-} shift 2 ;; --success-file) SUCCESS_FILES+=("${2:-}") shift 2 ;; --poll-seconds) POLL_SECONDS=${2:-} shift 2 ;; --heartbeat-seconds) HEARTBEAT_SECONDS=${2:-} shift 2 ;; --soft-timeout-seconds) SOFT_TIMEOUT_SECONDS=${2:-} shift 2 ;; --stall-warning-seconds) STALL_WARNING_SECONDS=${2:-} shift 2 ;; --hard-timeout-seconds) HARD_TIMEOUT_SECONDS=${2:-} shift 2 ;; --help|-h) usage exit 0 ;; *) fail_usage "unknown argument: $1" ;; esac done [[ -n "$COMMAND_FILE" ]] || fail_usage "--command-file is required" [[ -n "$STDOUT_FILE" ]] || fail_usage "--stdout-file is required" [[ -n "$STDERR_FILE" ]] || fail_usage "--stderr-file is required" [[ -n "$STATUS_FILE" ]] || fail_usage "--status-file is required" require_integer "poll-seconds" "$POLL_SECONDS" require_integer "heartbeat-seconds" "$HEARTBEAT_SECONDS" require_integer "soft-timeout-seconds" "$SOFT_TIMEOUT_SECONDS" require_integer "stall-warning-seconds" "$STALL_WARNING_SECONDS" require_integer "hard-timeout-seconds" "$HARD_TIMEOUT_SECONDS" [[ "$POLL_SECONDS" -gt 0 ]] || fail_usage "poll-seconds must be > 0" [[ "$HEARTBEAT_SECONDS" -gt 0 ]] || fail_usage "heartbeat-seconds must be > 0" [[ "$SOFT_TIMEOUT_SECONDS" -gt 0 ]] || fail_usage "soft-timeout-seconds must be > 0" [[ "$STALL_WARNING_SECONDS" -gt 0 ]] || fail_usage "stall-warning-seconds must be > 0" [[ "$HARD_TIMEOUT_SECONDS" -gt 0 ]] || fail_usage "hard-timeout-seconds must be > 0" [[ "$SOFT_TIMEOUT_SECONDS" -le "$HARD_TIMEOUT_SECONDS" ]] || fail_usage "soft-timeout-seconds must be <= hard-timeout-seconds" [[ "$STALL_WARNING_SECONDS" -le "$HARD_TIMEOUT_SECONDS" ]] || fail_usage "stall-warning-seconds must be <= hard-timeout-seconds" [[ -r "$COMMAND_FILE" ]] || fail_usage "command file is not readable: $COMMAND_FILE" } launch_child() { if command -v setsid >/dev/null 2>&1; then setsid bash "$COMMAND_FILE" >"$STDOUT_FILE" 2>"$STDERR_FILE" & USE_GROUP_KILL=1 else bash "$COMMAND_FILE" >"$STDOUT_FILE" 2>"$STDERR_FILE" & USE_GROUP_KILL=0 fi CHILD_PID=$! } main() { parse_args "$@" ensure_parent_dir "$STDOUT_FILE" ensure_parent_dir "$STDERR_FILE" ensure_parent_dir "$STATUS_FILE" : >"$STDOUT_FILE" : >"$STDERR_FILE" : >"$STATUS_FILE" START_TIME=$(date +%s) export START_TIME trap 'handle_signal INT' INT trap 'handle_signal TERM' TERM trap 'if [[ "$INTERRUPTED" -eq 0 ]]; then kill_child_process_group; fi' EXIT launch_child append_status info running-silent "reviewer child launched" local last_stdout_bytes=0 local last_stderr_bytes=0 local last_output_change_time=$START_TIME local last_heartbeat_time=$START_TIME local soft_timeout_logged=0 local stall_warning_logged=0 local heartbeat_count=0 while kill -0 "$CHILD_PID" 2>/dev/null; do sleep "$POLL_SECONDS" local now elapsed stdout_bytes stderr_bytes note state level now=$(date +%s) elapsed=$((now - START_TIME)) stdout_bytes=$(file_bytes "$STDOUT_FILE") stderr_bytes=$(file_bytes "$STDERR_FILE") if [[ $((now - last_heartbeat_time)) -ge "$HEARTBEAT_SECONDS" ]]; then heartbeat_count=$((heartbeat_count + 1)) append_status info in-progress "In progress ${heartbeat_count}" last_heartbeat_time=$now fi if [[ "$stdout_bytes" -ne "$last_stdout_bytes" || "$stderr_bytes" -ne "$last_stderr_bytes" ]]; then last_output_change_time=$now stall_warning_logged=0 state=running-active level=info note="reviewer output changed" else local silent_for silent_for=$((now - last_output_change_time)) if [[ "$silent_for" -ge "$STALL_WARNING_SECONDS" ]]; then state=stall-warning level=warn note="no output growth for ${silent_for}s; process still alive" stall_warning_logged=1 else state=running-silent level=info note="reviewer process alive; waiting for output" fi fi if [[ "$elapsed" -ge "$SOFT_TIMEOUT_SECONDS" && "$soft_timeout_logged" -eq 0 ]]; then note="$note; soft timeout reached, continuing while reviewer is alive" soft_timeout_logged=1 fi append_status "$level" "$state" "$note" last_stdout_bytes=$stdout_bytes last_stderr_bytes=$stderr_bytes if [[ "$elapsed" -ge "$HARD_TIMEOUT_SECONDS" ]]; then append_status error needs-operator-decision "hard timeout reached; terminating reviewer child for operator intervention" kill_child_process_group trap - EXIT exit "$EXIT_NEEDS_OPERATOR_DECISION" fi done local child_exit_code=0 set +e wait "$CHILD_PID" child_exit_code=$? set -e trap - EXIT local final_stdout_bytes final_stderr_bytes local success_file success_bytes final_stdout_bytes=$(file_bytes "$STDOUT_FILE") final_stderr_bytes=$(file_bytes "$STDERR_FILE") if [[ "$child_exit_code" -eq 0 ]]; then if [[ "$final_stdout_bytes" -gt 0 ]]; then append_status info completed "reviewer completed successfully" exit 0 fi if [[ ${#SUCCESS_FILES[@]} -gt 0 ]]; then for success_file in "${SUCCESS_FILES[@]}"; do success_bytes=$(file_bytes "$success_file") if [[ "$success_bytes" -gt 0 ]]; then append_status info completed "reviewer completed successfully via success file $(join_success_files)" exit 0 fi done fi append_status error completed-empty-output "reviewer exited successfully with empty stdout" exit "$EXIT_COMPLETED_EMPTY_OUTPUT" fi append_status error failed "reviewer exited with code $child_exit_code" exit "$child_exit_code" } main "$@"