diff --git a/.loopx/apply-adr/apply-feedback.sh b/.loopx/apply-adr/apply-feedback.sh index c27088d..5910ec0 100755 --- a/.loopx/apply-adr/apply-feedback.sh +++ b/.loopx/apply-adr/apply-feedback.sh @@ -5,6 +5,7 @@ ROOT="$LOOPX_PROJECT_ROOT" SHARED_DIR="$ROOT/.loopx/shared" FEEDBACK_FILE="$SHARED_DIR/.feedback.tmp" CLAUDE_OUTPUT_FILE="$SHARED_DIR/.claude-output.tmp" +SESSION_FILE="$SHARED_DIR/.session.tmp" if [[ ! -f "$FEEDBACK_FILE" ]]; then echo "Error: No feedback file found at $FEEDBACK_FILE" >&2 @@ -13,21 +14,19 @@ fi FEEDBACK=$(cat "$FEEDBACK_FILE") -ADR_0002=$(cat "$ROOT/adr/0002-run-subcommand.md") -SPEC=$(cat "$ROOT/SPEC.md") +RESOLVED=$("$SHARED_DIR/resolve-adr.sh") +IFS=$'\t' read -r ADR_NUM ADR_FILE <<< "$RESOLVED" +ADR_REL="adr/$(basename "$ADR_FILE")" -PROMPT="ADR-0002 has been accepted and I am now updating SPEC.md to incorporate its changes (per the ADR process in ADR-0001). I received the following feedback on the current state of SPEC.md. Apply this feedback by updating SPEC.md only. ADR-0002 is the authoritative reference for what should change — do not modify it. If there is any ambiguity about my intentions, ask me clarifying questions. Think critically about this feedback and push back if warranted. After you finish, commit and push. - -adr/0002-run-subcommand.md (accepted — read-only reference): -$ADR_0002 - -Current SPEC.md: -$SPEC +PROMPT="ADR-$ADR_NUM ($ADR_REL) has been accepted and I am now updating SPEC.md to incorporate its changes (per the ADR process in adr/0001-adr-process.md). I received the following feedback on the current state of SPEC.md. Apply this feedback by updating SPEC.md only. ADR-$ADR_NUM is the authoritative reference for what should change — do not modify it. SPEC.md should describe the current specification only — no migration notes, breaking changes, or references to prior versions. If there is any ambiguity about my intentions, ask me clarifying questions. When you ask questions, ask only one at a time and wait for my answer before asking the next — do not batch multiple questions together. I have not read the feedback, I only pasted it in, so phrase each question to stand on its own: include the relevant context or quote from the feedback so I can answer without having to go read it. Think critically about this feedback and push back if warranted. After you finish, commit and push. Feedback: $FEEDBACK" -CLAUDE_OUTPUT=$(echo "$PROMPT" | claude --dangerously-skip-permissions -p 2>/dev/null) +SESSION_ID=$(uuidgen | tr '[:upper:]' '[:lower:]') +echo "$SESSION_ID" > "$SESSION_FILE" + +CLAUDE_OUTPUT=$(echo "$PROMPT" | claude --dangerously-skip-permissions --session-id "$SESSION_ID" -p 2>/dev/null) rm -f "$FEEDBACK_FILE" echo "$CLAUDE_OUTPUT" > "$CLAUDE_OUTPUT_FILE" diff --git a/.loopx/apply-adr/index.sh b/.loopx/apply-adr/index.sh index d16117f..75ba538 100755 --- a/.loopx/apply-adr/index.sh +++ b/.loopx/apply-adr/index.sh @@ -3,7 +3,6 @@ set -euo pipefail ROOT="$LOOPX_PROJECT_ROOT" ADR_0001="$ROOT/adr/0001-adr-process.md" -ADR_0002="$ROOT/adr/0002-run-subcommand.md" SPEC="$ROOT/SPEC.md" SHARED_DIR="$ROOT/.loopx/shared" PROMPT_FILE="$SHARED_DIR/.prompt.tmp" @@ -19,23 +18,22 @@ if [[ ! -f "$ADR_0001" ]]; then exit 1 fi -if [[ ! -f "$ADR_0002" ]]; then - echo "Error: adr/0002-run-subcommand.md not found" >&2 - exit 1 -fi - if [[ ! -f "$SPEC" ]]; then echo "Error: SPEC.md not found" >&2 exit 1 fi +RESOLVED=$("$SHARED_DIR/resolve-adr.sh") +IFS=$'\t' read -r ADR_NUM ADR_FILE <<< "$RESOLVED" +ADR_REL="adr/$(basename "$ADR_FILE")" + echo "$LOOPX_WORKFLOW" > "$CALLER_FILE" cat < "$PROMPT_FILE" -ADR 0002 has been accepted. The next step in the process is to update SPEC.md to incorporate the changes described in ADR 0002. Review the current SPEC.md against ADR 0002 and let me know if the SPEC updates look correct and complete, or if anything else in the SPEC needs to be changed. +ADR $ADR_NUM has been accepted. The next step in the process is to update SPEC.md to incorporate the changes described in ADR $ADR_NUM. Review the current SPEC.md against ADR $ADR_NUM and let me know if the SPEC updates look correct and complete, or if anything else in the SPEC needs to be changed. -adr/0002-run-subcommand.md (accepted — do not modify): -$(cat "$ADR_0002") +$ADR_REL (accepted — do not modify): +$(cat "$ADR_FILE") SPEC.md (target of updates): $(cat "$SPEC") diff --git a/.loopx/ralph/check-ready.sh b/.loopx/ralph/check-ready.sh index 7d40193..9845fc9 100755 --- a/.loopx/ralph/check-ready.sh +++ b/.loopx/ralph/check-ready.sh @@ -19,10 +19,11 @@ echo "=== Readiness verdict: ${VERDICT} ===" >&2 if echo "$VERDICT" | grep -qw "READY"; then ITER=$(cat "$ITER_FILE" 2>/dev/null || echo "?") + JOB="$(basename "$ROOT") / ralph" curl -s -X POST "${TELEGRAM_API}/sendMessage" \ -d chat_id="$TELEGRAM_CHAT_ID" \ - --data-urlencode "text=Ralph loop: production ready after iteration ${ITER}. Halting." > /dev/null + --data-urlencode "text=[${JOB}] production ready after iteration ${ITER}. Halting." > /dev/null rm -f "$ITER_FILE" echo "=== Production ready — halting loop ===" >&2 diff --git a/.loopx/ralph/index.sh b/.loopx/ralph/index.sh index 80398a5..d253e32 100755 --- a/.loopx/ralph/index.sh +++ b/.loopx/ralph/index.sh @@ -22,9 +22,11 @@ else fi echo "$ITER" > "$ITER_FILE" +JOB="$(basename "$ROOT") / ralph" + curl -s -X POST "${TELEGRAM_API}/sendMessage" \ -d chat_id="$TELEGRAM_CHAT_ID" \ - --data-urlencode "text=Ralph loop: starting iteration ${ITER}" > /dev/null + --data-urlencode "text=[${JOB}] starting iteration ${ITER}" > /dev/null echo "=== Ralph iteration ${ITER} ===" >&2 diff --git a/.loopx/review-adr/apply-feedback.sh b/.loopx/review-adr/apply-feedback.sh index f13be69..ae292ee 100755 --- a/.loopx/review-adr/apply-feedback.sh +++ b/.loopx/review-adr/apply-feedback.sh @@ -5,6 +5,7 @@ ROOT="$LOOPX_PROJECT_ROOT" SHARED_DIR="$ROOT/.loopx/shared" FEEDBACK_FILE="$SHARED_DIR/.feedback.tmp" CLAUDE_OUTPUT_FILE="$SHARED_DIR/.claude-output.tmp" +SESSION_FILE="$SHARED_DIR/.session.tmp" if [[ ! -f "$FEEDBACK_FILE" ]]; then echo "Error: No feedback file found at $FEEDBACK_FILE" >&2 @@ -13,12 +14,19 @@ fi FEEDBACK=$(cat "$FEEDBACK_FILE") -PROMPT="I received the following feedback about ADR-0004 (run-scoped tmpdir and script args proposal) and its relationship to SPEC.md as defined by the the process laid out in ADR-0001. Incorporate this feedback to improve ADR-0004. If there is any ambiguity about my intentions, ask me clarifying questions. Think critically about this feedback and push back if warranted. Do not update any file other than ADR-0004 and do not mark it as accepted. After you finish, commit and push. +RESOLVED=$("$SHARED_DIR/resolve-adr.sh") +IFS=$'\t' read -r ADR_NUM ADR_FILE <<< "$RESOLVED" +ADR_REL="adr/$(basename "$ADR_FILE")" + +PROMPT="I received the following feedback about ADR-$ADR_NUM ($ADR_REL) and its relationship to SPEC.md as defined by the process laid out in ADR-0001. Incorporate this feedback to improve ADR-$ADR_NUM. Use your judgement on the best implementation details to achieve the product goals; if there is any ambiguity about my intentions, ask me clarifying questions from a product perspective rather than an implementation perspective. When you ask questions, ask only one at a time and wait for my answer before asking the next — do not batch multiple questions together. I have not read the feedback, I only pasted it in, so phrase each question to stand on its own: include the relevant context or quote from the feedback so I can answer without having to go read it. Think critically about this feedback and push back if warranted. Don't include migration details for existing users. Only include what is necessary to thoroughly update SPEC.md — keep additional information (such as testing considerations) to a minimum, because the primary purpose of this ADR is to be used to update SPEC.md. Do not update any file other than ADR-$ADR_NUM and do not mark it as accepted. After you finish, commit and push. Feedback: $FEEDBACK" -CLAUDE_OUTPUT=$(echo "$PROMPT" | claude --dangerously-skip-permissions -p 2>/dev/null) +SESSION_ID=$(uuidgen | tr '[:upper:]' '[:lower:]') +echo "$SESSION_ID" > "$SESSION_FILE" + +CLAUDE_OUTPUT=$(echo "$PROMPT" | claude --dangerously-skip-permissions --session-id "$SESSION_ID" -p 2>/dev/null) rm -f "$FEEDBACK_FILE" echo "$CLAUDE_OUTPUT" > "$CLAUDE_OUTPUT_FILE" diff --git a/.loopx/review-adr/index.sh b/.loopx/review-adr/index.sh index 4f70e93..09473d4 100755 --- a/.loopx/review-adr/index.sh +++ b/.loopx/review-adr/index.sh @@ -3,7 +3,6 @@ set -euo pipefail ROOT="$LOOPX_PROJECT_ROOT" ADR_0001="$ROOT/adr/0001-adr-process.md" -ADR_0004="$ROOT/adr/0004-tmpdir-and-args.md" SPEC="$ROOT/SPEC.md" SHARED_DIR="$ROOT/.loopx/shared" PROMPT_FILE="$SHARED_DIR/.prompt.tmp" @@ -19,26 +18,27 @@ if [[ ! -f "$ADR_0001" ]]; then exit 1 fi -if [[ ! -f "$ADR_0004" ]]; then - echo "Error: adr/0004-tmpdir-and-args.md not found" >&2 - exit 1 -fi - if [[ ! -f "$SPEC" ]]; then echo "Error: SPEC.md not found" >&2 exit 1 fi +RESOLVED=$("$SHARED_DIR/resolve-adr.sh") +IFS=$'\t' read -r ADR_NUM ADR_FILE <<< "$RESOLVED" +ADR_REL="adr/$(basename "$ADR_FILE")" + echo "$LOOPX_WORKFLOW" > "$CALLER_FILE" cat < "$PROMPT_FILE" -Review ADR 0001, ADR 0004, and SPEC.md holistically and let me know if I can mark ADR 0004 as accepted or if I need to improve it further. Ask me clarifying questions if you have any doubts about my intentions for ADR 0004. - adr/0001-adr-process.md: $(cat "$ADR_0001") -adr/0004-tmpdir-and-args.md: -$(cat "$ADR_0004") +--- + +Review ADR $ADR_NUM and SPEC.md against the process described in adr/0001-adr-process.md above. Let me know if I can mark ADR $ADR_NUM as accepted or if I need to improve it further. Ask me clarifying questions if you have any doubts about my intentions for ADR $ADR_NUM. + +$ADR_REL: +$(cat "$ADR_FILE") SPEC.md: $(cat "$SPEC") diff --git a/.loopx/review-spec/apply-feedback.sh b/.loopx/review-spec/apply-feedback.sh new file mode 100755 index 0000000..02f3392 --- /dev/null +++ b/.loopx/review-spec/apply-feedback.sh @@ -0,0 +1,32 @@ +#!/bin/bash +set -euo pipefail + +ROOT="$LOOPX_PROJECT_ROOT" +SHARED_DIR="$ROOT/.loopx/shared" +FEEDBACK_FILE="$SHARED_DIR/.feedback.tmp" +CLAUDE_OUTPUT_FILE="$SHARED_DIR/.claude-output.tmp" +SESSION_FILE="$SHARED_DIR/.session.tmp" + +if [[ ! -f "$FEEDBACK_FILE" ]]; then + echo "Error: No feedback file found at $FEEDBACK_FILE" >&2 + exit 1 +fi + +FEEDBACK=$(cat "$FEEDBACK_FILE") + +PROMPT="I got this feedback from a review of @SPEC.md please incorporate this feedback. Use your judgement to make implementation decisions as long as they are aligned with my product vision. If there is any ambiguity about my intentions, ask me clarifying questions from a product perspective rather than an implementation perspective. When you ask questions, ask only one at a time and wait for my answer before asking the next — do not batch multiple questions together. I have not read the feedback, I only pasted it in, so phrase each question to stand on its own: include the relevant context or quote from the feedback so I can answer without having to go read it. Think critically and push back if needed. Only modify SPEC.md and after you are done, commit and push: + +$FEEDBACK" + +SESSION_ID=$(uuidgen | tr '[:upper:]' '[:lower:]') +echo "$SESSION_ID" > "$SESSION_FILE" + +CLAUDE_OUTPUT=$(echo "$PROMPT" | claude --dangerously-skip-permissions --session-id "$SESSION_ID" -p 2>/dev/null) + +rm -f "$FEEDBACK_FILE" +echo "$CLAUDE_OUTPUT" > "$CLAUDE_OUTPUT_FILE" + +echo "" >&2 +echo "=== Claude finished applying feedback ===" >&2 + +$LOOPX_BIN output --goto "shared:check-question" diff --git a/.loopx/review-spec/index.sh b/.loopx/review-spec/index.sh new file mode 100755 index 0000000..e55a439 --- /dev/null +++ b/.loopx/review-spec/index.sh @@ -0,0 +1,29 @@ +#!/bin/bash +set -euo pipefail + +ROOT="$LOOPX_PROJECT_ROOT" +SPEC="$ROOT/SPEC.md" +SHARED_DIR="$ROOT/.loopx/shared" +PROMPT_FILE="$SHARED_DIR/.prompt.tmp" +CALLER_FILE="$SHARED_DIR/.caller.tmp" + +if [[ ! -d "$SHARED_DIR" ]]; then + echo "Error: shared workflow not found at $SHARED_DIR — install it with: loopx install -w shared modularcloud/sdg-workflows" >&2 + exit 1 +fi + +if [[ ! -f "$SPEC" ]]; then + echo "Error: SPEC.md not found at $SPEC — review-spec requires SPEC.md to exist at the project root" >&2 + exit 1 +fi + +echo "$LOOPX_WORKFLOW" > "$CALLER_FILE" + +cat < "$PROMPT_FILE" +Review my specification and let me know if this is implementation-ready or if you have feedback. To refine your feedback, feel free to ask questions. + +SPEC.md: +$(cat "$SPEC") +PROMPT + +$LOOPX_BIN output --goto "shared:dispatch" diff --git a/.loopx/review-test-spec/apply-feedback.sh b/.loopx/review-test-spec/apply-feedback.sh new file mode 100755 index 0000000..060334b --- /dev/null +++ b/.loopx/review-test-spec/apply-feedback.sh @@ -0,0 +1,38 @@ +#!/bin/bash +set -euo pipefail + +ROOT="$LOOPX_PROJECT_ROOT" +SHARED_DIR="$ROOT/.loopx/shared" +FEEDBACK_FILE="$SHARED_DIR/.feedback.tmp" +CLAUDE_OUTPUT_FILE="$SHARED_DIR/.claude-output.tmp" +SESSION_FILE="$SHARED_DIR/.session.tmp" + +if [[ ! -f "$FEEDBACK_FILE" ]]; then + echo "Error: No feedback file found at $FEEDBACK_FILE" >&2 + exit 1 +fi + +RESOLVED=$("$SHARED_DIR/resolve-adr.sh") +IFS=$'\t' read -r ADR_NUM ADR_FILE <<< "$RESOLVED" + +FEEDBACK=$(cat "$FEEDBACK_FILE") + +PROMPT="I received the following feedback on TEST-SPEC.md with respect to SPEC.md. Apply this feedback by updating TEST-SPEC.md. SPEC.md is the authoritative source for what TEST-SPEC.md should cover. + +This review and any feedback you apply is scoped to ADR-$ADR_NUM. You are allowed to modify SPEC.md only when the feedback proposes a specific SPEC change that resolves a SPEC-PROBLEMS.md entry scoped to ADR-$ADR_NUM, and you MUST verify any proposed SPEC.md edits with me before applying them — show me the exact diff and wait for explicit approval. Do **not** apply SPEC.md changes that are unrelated to ADR-$ADR_NUM, even if the feedback proposes them — push back and explain that those changes are out of scope for this cycle. If you find a problem in the spec (an ambiguity, gap, or under-specified clause that prevents TEST-SPEC.md from covering the behavior cleanly) and that problem is scoped to ADR-$ADR_NUM, add it to SPEC-PROBLEMS.md so we can work to resolve it in a follow-up cycle; do not record problems unrelated to ADR-$ADR_NUM. If there are no remaining problems in the spec scoped to ADR-$ADR_NUM, delete SPEC-PROBLEMS.md. If there is any ambiguity about my intentions, ask me clarifying questions. When you ask questions, ask only one at a time and wait for my answer before asking the next — do not batch multiple questions together. I have not read the feedback, I only pasted it in, so phrase each question to stand on its own: include the relevant context or quote from the feedback so I can answer without having to go read it. Think critically about this feedback and push back if warranted. After you finish, commit and push. + +Feedback: +$FEEDBACK" + +SESSION_ID=$(uuidgen | tr '[:upper:]' '[:lower:]') +echo "$SESSION_ID" > "$SESSION_FILE" + +CLAUDE_OUTPUT=$(echo "$PROMPT" | claude --dangerously-skip-permissions --session-id "$SESSION_ID" -p 2>/dev/null) + +rm -f "$FEEDBACK_FILE" +echo "$CLAUDE_OUTPUT" > "$CLAUDE_OUTPUT_FILE" + +echo "" >&2 +echo "=== Claude finished applying feedback ===" >&2 + +$LOOPX_BIN output --goto "shared:check-question" diff --git a/.loopx/review-test-spec/index.sh b/.loopx/review-test-spec/index.sh new file mode 100755 index 0000000..7c578bc --- /dev/null +++ b/.loopx/review-test-spec/index.sh @@ -0,0 +1,54 @@ +#!/bin/bash +set -euo pipefail + +ROOT="$LOOPX_PROJECT_ROOT" +SPEC="$ROOT/SPEC.md" +TEST_SPEC="$ROOT/TEST-SPEC.md" +SPEC_PROBLEMS="$ROOT/SPEC-PROBLEMS.md" +SHARED_DIR="$ROOT/.loopx/shared" +PROMPT_FILE="$SHARED_DIR/.prompt.tmp" +CALLER_FILE="$SHARED_DIR/.caller.tmp" + +if [[ ! -d "$SHARED_DIR" ]]; then + echo "Error: shared workflow not found at $SHARED_DIR — install it with: loopx install -w shared modularcloud/sdg-workflows" >&2 + exit 1 +fi + +if [[ ! -f "$SPEC" ]]; then + echo "Error: SPEC.md not found at $SPEC — review-test-spec requires SPEC.md to exist at the project root" >&2 + exit 1 +fi + +if [[ ! -f "$TEST_SPEC" ]]; then + echo "Error: TEST-SPEC.md not found at $TEST_SPEC — review-test-spec requires TEST-SPEC.md to exist at the project root" >&2 + exit 1 +fi + +RESOLVED=$("$SHARED_DIR/resolve-adr.sh") +IFS=$'\t' read -r ADR_NUM ADR_FILE <<< "$RESOLVED" +ADR_REL="adr/$(basename "$ADR_FILE")" + +echo "$LOOPX_WORKFLOW" > "$CALLER_FILE" + +if [[ -f "$SPEC_PROBLEMS" ]]; then + SPEC_PROBLEMS_BLOCK="$(printf 'SPEC-PROBLEMS.md (open SPEC ambiguities scoped to ADR-%s — please also work to resolve any problems listed here):\n%s\n\n' "$ADR_NUM" "$(cat "$SPEC_PROBLEMS")")" +else + SPEC_PROBLEMS_BLOCK="SPEC-PROBLEMS.md is absent — there are no currently tracked open SPEC ambiguities scoped to ADR-${ADR_NUM}."$'\n\n' +fi + +cat < "$PROMPT_FILE" +Review TEST-SPEC.md against SPEC.md and let me know whether TEST-SPEC.md covers the behavior described in SPEC.md correctly and completely, or what needs to be added, changed, or removed. + +This review is scoped to ADR-$ADR_NUM. SPEC.md changes are permitted **only** when they directly resolve a SPEC-PROBLEMS.md entry that is itself scoped to ADR-$ADR_NUM. Do **not** propose SPEC.md edits unrelated to ADR-$ADR_NUM, even if you notice gaps, ambiguities, or improvements in other sections of SPEC.md — those are out of scope for this cycle. Also work to resolve any problems in SPEC-PROBLEMS.md (if present) — call out which entries appear addressable in this cycle and which still require a SPEC change. If SPEC-PROBLEMS.md exists, you may propose specific SPEC.md edits that would resolve the listed ambiguities, but only when those edits are scoped to ADR-$ADR_NUM. If you find a SPEC ambiguity, gap, or under-specified clause that prevents TEST-SPEC.md from covering ADR-$ADR_NUM behavior cleanly, recommend adding it to SPEC-PROBLEMS.md; do not record problems unrelated to ADR-$ADR_NUM. + +$ADR_REL (scope of this review — SPEC and SPEC-PROBLEMS changes must be related to this ADR): +$(cat "$ADR_FILE") + +SPEC.md (authoritative reference; SPEC.md edits are permitted only to resolve SPEC-PROBLEMS.md entries scoped to ADR-$ADR_NUM): +$(cat "$SPEC") + +${SPEC_PROBLEMS_BLOCK}TEST-SPEC.md (target of updates): +$(cat "$TEST_SPEC") +PROMPT + +$LOOPX_BIN output --goto "shared:dispatch" diff --git a/.loopx/shared/apply-answer.sh b/.loopx/shared/apply-answer.sh index d406614..837b17c 100755 --- a/.loopx/shared/apply-answer.sh +++ b/.loopx/shared/apply-answer.sh @@ -4,16 +4,33 @@ set -euo pipefail ROOT="$LOOPX_PROJECT_ROOT" CLAUDE_OUTPUT_FILE="$ROOT/.loopx/$LOOPX_WORKFLOW/.claude-output.tmp" ANSWER_FILE="$ROOT/.loopx/$LOOPX_WORKFLOW/.answer.tmp" +SESSION_FILE="$ROOT/.loopx/$LOOPX_WORKFLOW/.session.tmp" +CALLER_FILE="$ROOT/.loopx/$LOOPX_WORKFLOW/.caller.tmp" if [[ ! -f "$ANSWER_FILE" ]]; then echo "Error: No answer file found at $ANSWER_FILE" >&2 exit 1 fi +if [[ ! -f "$SESSION_FILE" ]]; then + echo "Error: No session file found at $SESSION_FILE — apply-feedback must run first" >&2 + exit 1 +fi + +if [[ ! -f "$CALLER_FILE" ]]; then + echo "Error: No caller file found at $CALLER_FILE" >&2 + exit 1 +fi + ANSWER=$(cat "$ANSWER_FILE") +SESSION_ID=$(cat "$SESSION_FILE") +CALLER=$(cat "$CALLER_FILE") +CALLER_DIR="$ROOT/.loopx/$CALLER" -# Continue the existing conversation started by apply-feedback -CLAUDE_OUTPUT=$(echo "$ANSWER" | claude --dangerously-skip-permissions -c -p 2>/dev/null) +# Resume the session started by the caller's apply-feedback. Claude Code +# stores session files per-cwd, so we must cd into the caller's workflow +# directory before --resume can locate the session. +CLAUDE_OUTPUT=$(cd "$CALLER_DIR" && echo "$ANSWER" | claude --dangerously-skip-permissions --resume "$SESSION_ID" -p 2>/dev/null) echo "$CLAUDE_OUTPUT" > "$CLAUDE_OUTPUT_FILE" rm -f "$ANSWER_FILE" diff --git a/.loopx/shared/check-feedback-done.sh b/.loopx/shared/check-feedback-done.sh index 8d0f05a..f70e623 100755 --- a/.loopx/shared/check-feedback-done.sh +++ b/.loopx/shared/check-feedback-done.sh @@ -1,6 +1,10 @@ #!/bin/bash set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=./telegram-lib.sh +source "$SCRIPT_DIR/telegram-lib.sh" + ROOT="$LOOPX_PROJECT_ROOT" FEEDBACK_FILE="$ROOT/.loopx/$LOOPX_WORKFLOW/.feedback.tmp" SCHEMA="$ROOT/.loopx/$LOOPX_WORKFLOW/check-feedback-done.schema.json" @@ -25,7 +29,7 @@ CALLER=$(cat "$CALLER_FILE") FEEDBACK=$(cat "$FEEDBACK_FILE") -VERDICT=$(codex exec --output-schema "$SCHEMA" "I received this feedback for my specs. Is it requiring that I continue making improvements before calling this stage of feedback done? Ignore optional feedback. Return done=true only if the response says we can be done explicitly and/or there are no further non-optional pieces of feedback. Note: if it says 'make this important change and then you are done', that does NOT count as done — return done=false. +VERDICT=$(codex exec --sandbox read-only --output-schema "$SCHEMA" "I received this feedback for my specs. Is it requiring that I continue making improvements before calling this stage of feedback done? Ignore optional feedback. Return done=true only if the response says we can be done explicitly and/or there are no further non-optional pieces of feedback. Note: if it says 'make this important change and then you are done', that does NOT count as done — return done=false. Feedback: $FEEDBACK" 2>/dev/null) @@ -34,7 +38,13 @@ DONE=$(echo "$VERDICT" | jq -r '.done') if [[ "$DONE" == "true" ]]; then echo "=== Feedback indicates no further non-optional improvements — halting ===" >&2 - rm -f "$FEEDBACK_FILE" "$CALLER_FILE" + + ALERT_LABEL=$(tg_alert_label) + curl -s -X POST "${TELEGRAM_API}/sendMessage" \ + -d chat_id="$TELEGRAM_CHAT_ID" \ + --data-urlencode "text=[${ALERT_LABEL}] Feedback indicates no further non-optional improvements. Halting." > /dev/null + + rm -f "$FEEDBACK_FILE" "$CALLER_FILE" "$ROOT/.loopx/$LOOPX_WORKFLOW/.session.tmp" $LOOPX_BIN output --result "Feedback indicates no further non-optional improvements. Halting." --stop else echo "=== Feedback requires further improvements — applying ===" >&2 diff --git a/.loopx/shared/check-question.schema.json b/.loopx/shared/check-question.schema.json index fe39a58..ac9f0ee 100644 --- a/.loopx/shared/check-question.schema.json +++ b/.loopx/shared/check-question.schema.json @@ -2,7 +2,7 @@ "type": "object", "properties": { "has_question": { - "description": "true if the text contains a question or request for clarification directed at the user, false otherwise", + "description": "true only if the text contains an actual pending question that blocks completion of the work (the agent needs an answer before proceeding); false for sign-offs that report work as complete and invite post-hoc corrections", "type": "boolean" } }, diff --git a/.loopx/shared/check-question.sh b/.loopx/shared/check-question.sh index 7fa51e0..53c1c2f 100755 --- a/.loopx/shared/check-question.sh +++ b/.loopx/shared/check-question.sh @@ -1,105 +1,180 @@ #!/bin/bash set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=./telegram-lib.sh +source "$SCRIPT_DIR/telegram-lib.sh" + ROOT="$LOOPX_PROJECT_ROOT" CLAUDE_OUTPUT_FILE="$ROOT/.loopx/$LOOPX_WORKFLOW/.claude-output.tmp" ANSWER_FILE="$ROOT/.loopx/$LOOPX_WORKFLOW/.answer.tmp" -: "${TELEGRAM_BOT_TOKEN:?TELEGRAM_BOT_TOKEN env var is required}" -: "${TELEGRAM_CHAT_ID:?TELEGRAM_CHAT_ID env var is required}" - -TELEGRAM_API="https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}" - if [[ ! -f "$CLAUDE_OUTPUT_FILE" ]]; then echo "Error: No Claude output file found at $CLAUDE_OUTPUT_FILE" >&2 exit 1 fi CLAUDE_OUTPUT=$(cat "$CLAUDE_OUTPUT_FILE") +CLAUDE_OUTPUT_HTML=$(printf '%s' "$CLAUDE_OUTPUT" | node "$SCRIPT_DIR/md-to-tg-html.mjs") # Use Codex with output schema to deterministically classify the output SCHEMA="$ROOT/.loopx/$LOOPX_WORKFLOW/check-question.schema.json" -VERDICT=$(codex exec --output-schema "$SCHEMA" "Does the following text contain a question or request for clarification directed at the user? +VERDICT=$(codex exec --sandbox read-only --output-schema "$SCHEMA" "Does the following text contain an actual question that blocks completion of the work — i.e., the agent cannot continue or finish until the user answers? + +Do NOT count these as blocking questions: +- Sign-offs that report work as complete and invite post-hoc corrections (e.g., \"I finished X, let me know if anything is wrong\" or \"Tell me if I got something wrong\"). Correcting wrong assumptions is not a priority — treat these as has_question=false. +- General invitations for follow-up feedback after the work is already done. + +Return has_question=true only when the text is paused on a pending question the agent needs an answer to before proceeding. When in doubt, prefer has_question=false. $CLAUDE_OUTPUT" 2>/dev/null) HAS_QUESTION=$(echo "$VERDICT" | jq -r '.has_question') +TOPIC_NAME=$(tg_topic_name) +ALERT_LABEL=$(tg_alert_label) + if [[ "$HAS_QUESTION" == "true" ]]; then - echo "=== Claude has a question — sending to Telegram ===" >&2 - - # Send as document if too long for a message, otherwise as text - if [[ ${#CLAUDE_OUTPUT} -gt 4000 ]]; then - QUESTION_FILE="$ROOT/.loopx/$LOOPX_WORKFLOW/.question.tmp" - echo "$CLAUDE_OUTPUT" > "$QUESTION_FILE" - curl -s -X POST "${TELEGRAM_API}/sendDocument" \ - -F chat_id="$TELEGRAM_CHAT_ID" \ - -F document=@"$QUESTION_FILE;filename=question.md" \ - -F caption="Claude has a question — reply with your answer" > /dev/null - rm -f "$QUESTION_FILE" - else - curl -s -X POST "${TELEGRAM_API}/sendMessage" \ - -d chat_id="$TELEGRAM_CHAT_ID" \ - --data-urlencode "text=${CLAUDE_OUTPUT}" > /dev/null + echo "=== Claude has a question — sending to Telegram topic '$TOPIC_NAME' ===" >&2 + + THREAD_ID=$(tg_resolve_topic_id "$TOPIC_NAME") + + # Watermark BEFORE sending so a near-instant reply isn't filtered out. Use + # negative offset to peek the tail without advancing the global confirmation + # pointer — confirming would delete updates queued for parallel runs. + SENTINEL=$(curl -s "${TELEGRAM_API}/getUpdates?offset=-1&limit=1" | jq -r '.result[-1].update_id // 0') + + # Telegram caps sendMessage text at 4096 chars; chunk at 4000 to leave headroom + # for UTF-16 surrogate pairs (${#var} counts code points, not code units). + send_question() { + local thread="$1" + local max=4000 + local remaining="$CLAUDE_OUTPUT_HTML" + local response="" + local first=1 + while (( ${#remaining} > 0 )); do + local chunk + if (( ${#remaining} <= max )); then + chunk="$remaining" + remaining="" + else + local window="${remaining:0:$max}" + local trimmed="${window%$'\n'*}" + if [[ "$trimmed" != "$window" && ${#trimmed} -gt $((max / 2)) ]]; then + chunk="${trimmed}"$'\n' + else + chunk="$window" + fi + remaining="${remaining:${#chunk}}" + fi + response=$(curl -s -X POST "${TELEGRAM_API}/sendMessage" \ + -d chat_id="$TELEGRAM_CHAT_ID" \ + -d message_thread_id="$thread" \ + -d parse_mode=HTML \ + --data-urlencode "text=${chunk}") + # Chunking can split inside an HTML tag; on a parse error fall back to + # plain text for that chunk so the message still lands. + if [[ "$(echo "$response" | jq -r '.ok')" != "true" ]]; then + local desc + desc=$(echo "$response" | jq -r '.description // ""') + if [[ "$desc" == *"can't parse entities"* || "$desc" == *"Can't parse entities"* ]]; then + response=$(curl -s -X POST "${TELEGRAM_API}/sendMessage" \ + -d chat_id="$TELEGRAM_CHAT_ID" \ + -d message_thread_id="$thread" \ + --data-urlencode "text=${chunk}") + fi + fi + if [[ "$(echo "$response" | jq -r '.ok')" != "true" ]]; then + # First-chunk failure bubbles up so the caller can retry on stale thread. + # Later-chunk failures can't be undone, so warn and keep going. + if (( first )); then + echo "$response" + return + fi + echo "Warning: failed to send follow-up chunk: $response" >&2 + fi + first=0 + done + echo "$response" + } + + SEND_RESPONSE=$(send_question "$THREAD_ID") + if [[ "$(echo "$SEND_RESPONSE" | jq -r '.ok')" != "true" ]]; then + DESC=$(echo "$SEND_RESPONSE" | jq -r '.description // ""') + if tg_is_stale_thread_error "$DESC"; then + echo "Warning: cached topic $THREAD_ID no longer usable — recreating..." >&2 + tg_forget_topic "$TOPIC_NAME" + THREAD_ID=$(tg_resolve_topic_id "$TOPIC_NAME") + SEND_RESPONSE=$(send_question "$THREAD_ID") + fi fi - - echo "Waiting for answer..." >&2 - - # Flush old updates to get current offset - FLUSH_RESPONSE=$(curl -s "${TELEGRAM_API}/getUpdates?offset=-1") - LAST_UPDATE_ID=$(echo "$FLUSH_RESPONSE" | jq -r '.result[-1].update_id // empty') - if [[ -n "$LAST_UPDATE_ID" ]]; then - OFFSET=$((LAST_UPDATE_ID + 1)) - else - OFFSET=0 + if [[ "$(echo "$SEND_RESPONSE" | jq -r '.ok')" != "true" ]]; then + echo "Error: Failed to send question to Telegram: $SEND_RESPONSE" >&2 + exit 1 fi - # Long-poll for a reply, collecting split messages over a 10s window + echo "Waiting for answer in topic '$TOPIC_NAME' (thread $THREAD_ID)..." >&2 + COLLECTED="" DEADLINE="" while true; do - if [[ -n "$DEADLINE" ]]; then - NOW=$(date +%s) - if [[ $NOW -ge $DEADLINE ]]; then - break - fi - POLL_TIMEOUT=2 - else - POLL_TIMEOUT=30 + # Negative offset + sentinel: read the tail without advancing the global + # confirmation pointer, and filter per-run by thread_id so parallel review + # cycles don't steal each other's answers. + UPDATES=$(curl -s "${TELEGRAM_API}/getUpdates?offset=-100&limit=100") + if [[ "$(echo "$UPDATES" | jq -r '.ok // false')" != "true" ]]; then + sleep 2 + continue fi - UPDATES=$(curl -s "${TELEGRAM_API}/getUpdates?offset=${OFFSET}&timeout=${POLL_TIMEOUT}") - - MSG_COUNT=$(echo "$UPDATES" | jq --arg cid "$TELEGRAM_CHAT_ID" ' - [.result[] | select(.message.chat.id == ($cid | tonumber) and .message.text != null)] | length + BATCH=$(echo "$UPDATES" | jq -r \ + --arg cid "$TELEGRAM_CHAT_ID" \ + --argjson thread "$THREAD_ID" \ + --argjson sentinel "$SENTINEL" ' + [.result[] + | select(.message.chat.id == ($cid|tonumber) + and .message.text != null + and (.message.message_thread_id // 0) == $thread + and .update_id > $sentinel)] + | map(.message.text) | join("\n") + ') + MAX_ID=$(echo "$UPDATES" | jq -r \ + --arg cid "$TELEGRAM_CHAT_ID" \ + --argjson thread "$THREAD_ID" \ + --argjson sentinel "$SENTINEL" ' + [.result[] + | select(.message.chat.id == ($cid|tonumber) + and .message.text != null + and (.message.message_thread_id // 0) == $thread + and .update_id > $sentinel) + | .update_id] + | max // 0 ') - if [[ "$MSG_COUNT" -gt 0 ]]; then - NEW_TEXTS=$(echo "$UPDATES" | jq -r --arg cid "$TELEGRAM_CHAT_ID" ' - [.result[] | select(.message.chat.id == ($cid | tonumber) and .message.text != null)] - | .[].message.text - ') + if [[ -n "$BATCH" ]]; then if [[ -n "$COLLECTED" ]]; then COLLECTED="${COLLECTED} -${NEW_TEXTS}" +${BATCH}" else - COLLECTED="$NEW_TEXTS" + COLLECTED="$BATCH" + fi + if (( MAX_ID > SENTINEL )); then + SENTINEL=$MAX_ID fi - if [[ -z "$DEADLINE" ]]; then DEADLINE=$(( $(date +%s) + 10 )) echo "=== First message received, collecting for 10s... ===" >&2 fi fi - NEW_LAST=$(echo "$UPDATES" | jq -r '.result[-1].update_id // empty') - if [[ -n "$NEW_LAST" ]]; then - OFFSET=$((NEW_LAST + 1)) + if [[ -n "$DEADLINE" ]]; then + NOW=$(date +%s) + (( NOW >= DEADLINE )) && break fi - done - curl -s "${TELEGRAM_API}/getUpdates?offset=${OFFSET}" > /dev/null + sleep 2 + done echo "$COLLECTED" > "$ANSWER_FILE" echo "=== Answer received from Telegram ===" >&2 @@ -110,12 +185,13 @@ ${NEW_TEXTS}" $LOOPX_BIN output --goto "apply-answer" exit 0 else - # No question — notify and loop back to copy-prompt + # Terminal alert — route to General with the workflow label so it's clear + # which run is speaking. Per-run topic stays focused on the review dialog. curl -s -X POST "${TELEGRAM_API}/sendMessage" \ -d chat_id="$TELEGRAM_CHAT_ID" \ - -d text="Feedback applied. Ready for next review cycle." > /dev/null + --data-urlencode "text=[${ALERT_LABEL}] Feedback applied. Ready for next review cycle." > /dev/null - rm -f "$CLAUDE_OUTPUT_FILE" "$ROOT/.loopx/$LOOPX_WORKFLOW/.caller.tmp" + rm -f "$CLAUDE_OUTPUT_FILE" "$ROOT/.loopx/$LOOPX_WORKFLOW/.caller.tmp" "$ROOT/.loopx/$LOOPX_WORKFLOW/.session.tmp" echo "=== No questions. Ready for next review cycle. ===" >&2 $LOOPX_BIN output --result "Feedback applied. Ready for next review cycle." fi diff --git a/.loopx/shared/md-to-tg-html.mjs b/.loopx/shared/md-to-tg-html.mjs new file mode 100644 index 0000000..593de40 --- /dev/null +++ b/.loopx/shared/md-to-tg-html.mjs @@ -0,0 +1,74 @@ +#!/usr/bin/env node +// Markdown → Telegram-HTML converter for Claude Code output. +// Not a full CommonMark parser; covers the subset Claude typically emits. +// Telegram-supported tags: b, i, u, s, code, pre, a, blockquote, tg-spoiler. +// Headings and lists aren't supported natively, so # → bold and - → •. + +import { readFileSync } from "node:fs"; + +const input = readFileSync(0, "utf8"); + +const placeholders = []; +const ph = (s) => { + const token = `\x00${placeholders.length}\x00`; + placeholders.push(s); + return token; +}; + +const escHtml = (s) => + s.replace(/&/g, "&").replace(//g, ">"); + +const escAttr = (s) => + s.replace(/&/g, "&").replace(/"/g, """); + +let md = input; + +// Extract fenced code blocks first so inner content is not touched by other rules. +md = md.replace(/```([a-zA-Z0-9_+-]*)\r?\n([\s\S]*?)```/g, (_m, lang, code) => { + const body = escHtml(code.replace(/\r?\n$/, "")); + const langAttr = lang ? ` class="language-${lang}"` : ""; + return ph(`
${body}
`); +}); + +// Inline code. +md = md.replace(/`([^`\n]+)`/g, (_m, code) => ph(`${escHtml(code)}`)); + +// Escape the remaining document; from here on we emit tags directly. +md = escHtml(md); + +// Links [text](url). +md = md.replace( + /\[([^\]\n]+)\]\(([^)\s]+)\)/g, + (_m, text, url) => `${text}`, +); + +// Bold: **text** or __text__. +md = md.replace(/\*\*([^*\n]+?)\*\*/g, "$1"); +md = md.replace(/(^|[^\w])__([^_\n]+?)__(?!\w)/g, "$1$2"); + +// Italic: *text* / _text_ (avoid list markers and intra-word underscores). +md = md.replace(/(^|[^*\w])\*([^*\n]+?)\*(?!\*)/g, "$1$2"); +md = md.replace(/(^|[^_\w])_([^_\n]+?)_(?!\w)/g, "$1$2"); + +// Headings → bold (Telegram has no heading tag). +md = md.replace(/^#{1,6}\s+(.+?)\s*#*$/gm, "$1"); + +// Bullet list markers → •. +md = md.replace(/^(\s*)[-*+]\s+/gm, "$1• "); + +// Horizontal rules. +md = md.replace(/^\s*(?:-{3,}|\*{3,}|_{3,})\s*$/gm, "────────"); + +// Blockquotes: wrap consecutive `> ` lines in a single
. +// (Escape pass has turned `>` into `>`, so that's what we match.) +md = md.replace(/(?:^>\s?.*(?:\n|$))+/gm, (block) => { + const body = block + .replace(/^>\s?/gm, "") + .replace(/\n$/, ""); + return `
${body}
\n`; +}); + +// Restore extracted code spans. +md = md.replace(/\x00(\d+)\x00/g, (_m, i) => placeholders[Number(i)]); + +process.stdout.write(md); diff --git a/.loopx/shared/resolve-adr.sh b/.loopx/shared/resolve-adr.sh new file mode 100755 index 0000000..7756870 --- /dev/null +++ b/.loopx/shared/resolve-adr.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Resolves the ADR env var (e.g. ADR=4 or ADR=0004) to a full adr/NNNN-*.md path. +# Outputs: "\t" on stdout. +# Errors to stderr and exits 1 if ADR is unset, non-numeric, missing, or ambiguous. +set -euo pipefail + +: "${LOOPX_PROJECT_ROOT:?LOOPX_PROJECT_ROOT must be set}" + +if [[ -z "${ADR:-}" ]]; then + echo "Error: ADR env var is required (e.g. ADR=4 or ADR=0004)" >&2 + exit 1 +fi + +if ! [[ "$ADR" =~ ^[0-9]+$ ]]; then + echo "Error: ADR must be numeric (got '$ADR')" >&2 + exit 1 +fi + +PADDED=$(printf '%04d' "$((10#$ADR))") + +shopt -s nullglob +MATCHES=("$LOOPX_PROJECT_ROOT"/adr/"$PADDED"-*.md) +shopt -u nullglob + +if [[ ${#MATCHES[@]} -eq 0 ]]; then + echo "Error: no ADR file found at adr/${PADDED}-*.md under $LOOPX_PROJECT_ROOT" >&2 + exit 1 +fi + +if [[ ${#MATCHES[@]} -gt 1 ]]; then + echo "Error: multiple ADR files match adr/${PADDED}-*.md:" >&2 + printf ' %s\n' "${MATCHES[@]}" >&2 + exit 1 +fi + +printf '%s\t%s\n' "$PADDED" "${MATCHES[0]}" diff --git a/.loopx/shared/send-api.ts b/.loopx/shared/send-api.ts index 086ff22..882c33a 100644 --- a/.loopx/shared/send-api.ts +++ b/.loopx/shared/send-api.ts @@ -9,6 +9,7 @@ const THINKING = (process.env.GPT_PRO_THINKING ?? "medium") as | "medium" | "high" | "xhigh"; +const FLEX = process.env.OPENAI_FLEX === "true"; if (!process.env.OPENAI_API_KEY) { console.error( @@ -30,7 +31,7 @@ const prompt = readFileSync(PROMPT_FILE, "utf8"); if (existsSync(FEEDBACK_FILE)) rmSync(FEEDBACK_FILE); -const client = new OpenAI(); +const client = new OpenAI({ timeout: 60_000, maxRetries: 4 }); // Resume a prior background response if one was persisted from an earlier run // that died before the response reached a terminal state. Background responses @@ -54,26 +55,66 @@ if (existsSync(RESPONSE_ID_FILE)) { } if (!response) { - console.error(`requesting gpt-5.4-pro (thinking=${THINKING})...`); + console.error( + `requesting gpt-5.5-pro (thinking=${THINKING}${FLEX ? ", flex" : ""})...`, + ); response = await client.responses.create({ - model: "gpt-5.4-pro", + model: "gpt-5.5-pro", reasoning: { effort: THINKING }, input: prompt, background: true, + prompt_cache_key: WORKFLOW, + prompt_cache_retention: "24h", + ...(FLEX ? { service_tier: "flex" } : {}), }); writeFileSync(RESPONSE_ID_FILE, response.id); console.error(`submitted background response ${response.id}`); } const TERMINAL = new Set(["completed", "failed", "cancelled", "incomplete"]); +// Each failed retrieve has already been through the SDK's 4 retries, so 20 +// consecutive failures represents tens of minutes of sustained OpenAI +// unavailability — wider than any single inference window. +const MAX_POLL_FAILURES = 20; +// Inference can take up to ~30 min at xhigh thinking; 1 h bounds worst-case +// hangs where a response is accepted but never transitions to terminal. +// Measured from response.created_at so resumes don't reset the clock. +const DEADLINE_SECS = 60 * 60; let lastStatus: string | undefined; +let pollFailures = 0; while (!TERMINAL.has(response.status ?? "")) { + const ageSecs = Math.floor(Date.now() / 1000) - response.created_at; + if (ageSecs > DEADLINE_SECS) { + console.error( + `${response.id} exceeded ${DEADLINE_SECS / 60}m deadline (age ${ageSecs}s, status ${response.status}); cancelling`, + ); + try { + await client.responses.cancel(response.id); + } catch (err: any) { + const status = err?.status ?? err?.code ?? "error"; + console.error(`cancel ${response.id} failed (${status})`); + } + rmSync(RESPONSE_ID_FILE, { force: true }); + throw new Error( + `gpt-5.5-pro response ${response.id} did not complete within ${DEADLINE_SECS / 60} minutes (last status: ${response.status ?? "unknown"})`, + ); + } if (response.status !== lastStatus) { console.error(`waiting for ${response.id} (${response.status})...`); lastStatus = response.status ?? undefined; } await new Promise((r) => setTimeout(r, 2000)); - response = await client.responses.retrieve(response.id); + try { + response = await client.responses.retrieve(response.id); + pollFailures = 0; + } catch (err: any) { + pollFailures++; + const status = err?.status ?? err?.code ?? "error"; + console.error( + `poll ${response.id} failed (${status}); retry ${pollFailures}/${MAX_POLL_FAILURES}`, + ); + if (pollFailures >= MAX_POLL_FAILURES) throw err; + } } if (response.status !== "completed") { @@ -84,7 +125,16 @@ if (response.status !== "completed") { ? `: ${response.error.message}` : ""; throw new Error( - `gpt-5.4-pro response ${response.id} ended in status ${response.status}${detail}`, + `gpt-5.5-pro response ${response.id} ended in status ${response.status}${detail}`, + ); +} + +const cachedTokens = response.usage?.input_tokens_details?.cached_tokens ?? 0; +const inputTokens = response.usage?.input_tokens ?? 0; +if (inputTokens > 0) { + const pct = Math.round((100 * cachedTokens) / inputTokens); + console.error( + `tokens: input=${inputTokens} cached=${cachedTokens} (${pct}%) output=${response.usage?.output_tokens ?? 0}`, ); } @@ -98,13 +148,16 @@ const answer = .join("\n"); if (!answer) { - throw new Error("gpt-5.4-pro returned no output text"); + throw new Error("gpt-5.5-pro returned no output text"); } writeFileSync(FEEDBACK_FILE, answer); -rmSync(PROMPT_FILE); -if (existsSync(RESPONSE_ID_FILE)) rmSync(RESPONSE_ID_FILE); -console.error("=== Feedback received from GPT-5.4-Pro ==="); +// force:true — external cleanup (reinstall, git clean, sibling workflow) can +// remove these scratch files mid-run; crashing here would skip the goto to +// check-feedback-done and strand the loop. +rmSync(PROMPT_FILE, { force: true }); +rmSync(RESPONSE_ID_FILE, { force: true }); +console.error("=== Feedback received from GPT-5.5-Pro ==="); execFileSync(BIN, ["output", "--goto", "check-feedback-done"], { stdio: "inherit", diff --git a/.loopx/shared/send-batch.ts b/.loopx/shared/send-batch.ts index 11cd783..bdf40c8 100644 --- a/.loopx/shared/send-batch.ts +++ b/.loopx/shared/send-batch.ts @@ -30,27 +30,32 @@ const prompt = readFileSync(PROMPT_FILE, "utf8"); if (existsSync(FEEDBACK_FILE)) rmSync(FEEDBACK_FILE); -const client = new OpenAI(); +const client = new OpenAI({ timeout: 60_000, maxRetries: 4 }); const batchLine = JSON.stringify({ - custom_id: `gpt54-${randomUUID()}`, + custom_id: `gpt55-${randomUUID()}`, method: "POST", url: "/v1/responses", body: { - model: "gpt-5.4-pro", + model: "gpt-5.5-pro", reasoning: { effort: THINKING }, input: prompt, + prompt_cache_key: WORKFLOW, + prompt_cache_retention: "24h", }, }); -const inputFile = await client.files.create({ - file: await toFile( - Buffer.from(batchLine + "\n", "utf8"), - `batch-${Date.now()}.jsonl`, - { type: "application/x-ndjson" }, - ), - purpose: "batch", -}); +const inputFile = await client.files.create( + { + file: await toFile( + Buffer.from(batchLine + "\n", "utf8"), + `batch-${Date.now()}.jsonl`, + { type: "application/x-ndjson" }, + ), + purpose: "batch", + }, + { timeout: 300_000 }, +); const batch = await client.batches.create({ input_file_id: inputFile.id, @@ -62,13 +67,29 @@ console.error(`submitted batch: ${batch.id}`); let b = batch; let lastStatus: string | undefined; +// Each failed retrieve has already been through the SDK's 4 retries, so 20 +// consecutive failures represents tens of minutes of sustained OpenAI +// unavailability. Crashing would forfeit the batch and double-submit next run. +const MAX_POLL_FAILURES = 20; +let pollFailures = 0; while ( !b.output_file_id && !b.error_file_id && !["failed", "expired", "cancelled"].includes(b.status) ) { await new Promise((r) => setTimeout(r, 2000)); - b = await client.batches.retrieve(batch.id); + try { + b = await client.batches.retrieve(batch.id); + pollFailures = 0; + } catch (err: any) { + pollFailures++; + const status = err?.status ?? err?.code ?? "error"; + console.error( + `poll batch ${batch.id} failed (${status}); retry ${pollFailures}/${MAX_POLL_FAILURES}`, + ); + if (pollFailures >= MAX_POLL_FAILURES) throw err; + continue; + } if (b.status !== lastStatus) { console.error(`waiting for batch ${b.id} (${b.status})...`); lastStatus = b.status; @@ -76,14 +97,18 @@ while ( } if (b.error_file_id) { - const err = await (await client.files.content(b.error_file_id)).text(); + const err = await ( + await client.files.content(b.error_file_id, { timeout: 300_000 }) + ).text(); throw new Error(`Batch error: ${err}`); } if (!b.output_file_id) { throw new Error(`Batch ${batch.id} ended in status ${b.status}`); } -const outText = await (await client.files.content(b.output_file_id)).text(); +const outText = await ( + await client.files.content(b.output_file_id, { timeout: 300_000 }) +).text(); const line = JSON.parse(outText.trim().split(/\r?\n/)[0]); if (line.error) throw new Error(`Batch error: ${line.error.message}`); @@ -98,8 +123,11 @@ const answer = .join("\n"); writeFileSync(FEEDBACK_FILE, answer); -rmSync(PROMPT_FILE); -console.error("=== Feedback received from GPT-5.4-Pro ==="); +// force:true — external cleanup (reinstall, git clean, sibling workflow) can +// remove the prompt file mid-run; crashing here would skip the goto to +// check-feedback-done and strand the loop. +rmSync(PROMPT_FILE, { force: true }); +console.error("=== Feedback received from GPT-5.5-Pro ==="); execFileSync(BIN, ["output", "--goto", "check-feedback-done"], { stdio: "inherit", diff --git a/.loopx/shared/send-codex.sh b/.loopx/shared/send-codex.sh index 5fe3a97..e2268b2 100755 --- a/.loopx/shared/send-codex.sh +++ b/.loopx/shared/send-codex.sh @@ -18,22 +18,34 @@ fi echo "" >&2 echo "=== Invoking codex CLI ===" >&2 -rm -f "$FEEDBACK_FILE" +STDERR_LOG="$ROOT/.loopx/$LOOPX_WORKFLOW/.codex-stderr.tmp" +rm -f "$FEEDBACK_FILE" "$STDERR_LOG" +set +e codex exec - \ --skip-git-repo-check \ --sandbox read-only \ --color never \ --output-last-message "$FEEDBACK_FILE" \ - < "$PROMPT_FILE" >/dev/null + < "$PROMPT_FILE" >/dev/null 2>"$STDERR_LOG" +CODEX_STATUS=$? +set -e rm -f "$PROMPT_FILE" -if [[ ! -s "$FEEDBACK_FILE" ]]; then - echo "Error: codex produced no feedback" >&2 +if [[ $CODEX_STATUS -ne 0 || ! -s "$FEEDBACK_FILE" ]]; then + echo "Error: codex exec failed (exit=$CODEX_STATUS) or produced no feedback" >&2 + if [[ -s "$STDERR_LOG" ]]; then + echo "--- codex stderr ---" >&2 + cat "$STDERR_LOG" >&2 + echo "--- end codex stderr ---" >&2 + fi + rm -f "$STDERR_LOG" exit 1 fi +rm -f "$STDERR_LOG" + echo "=== Feedback received from codex ===" >&2 echo "--- Begin feedback ---" >&2 cat "$FEEDBACK_FILE" >&2 diff --git a/.loopx/shared/send-telegram.sh b/.loopx/shared/send-telegram.sh index 822cba5..6497c1f 100755 --- a/.loopx/shared/send-telegram.sh +++ b/.loopx/shared/send-telegram.sh @@ -1,99 +1,123 @@ #!/bin/bash set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=./telegram-lib.sh +source "$SCRIPT_DIR/telegram-lib.sh" + ROOT="$LOOPX_PROJECT_ROOT" PROMPT_FILE="$ROOT/.loopx/$LOOPX_WORKFLOW/.prompt.tmp" FEEDBACK_FILE="$ROOT/.loopx/$LOOPX_WORKFLOW/.feedback.tmp" -: "${TELEGRAM_BOT_TOKEN:?TELEGRAM_BOT_TOKEN env var is required}" -: "${TELEGRAM_CHAT_ID:?TELEGRAM_CHAT_ID env var is required}" - if [[ ! -s "$PROMPT_FILE" ]]; then echo "Error: prompt file not found at $PROMPT_FILE" >&2 exit 1 fi -TELEGRAM_API="https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}" - -# Flush old updates to get current offset -FLUSH_RESPONSE=$(curl -s "${TELEGRAM_API}/getUpdates?offset=-1") -LAST_UPDATE_ID=$(echo "$FLUSH_RESPONSE" | jq -r '.result[-1].update_id // empty') -if [[ -n "$LAST_UPDATE_ID" ]]; then - OFFSET=$((LAST_UPDATE_ID + 1)) -else - OFFSET=0 +TOPIC_NAME=$(tg_topic_name) + +send_prompt() { + curl -s -X POST "${TELEGRAM_API}/sendDocument" \ + -F chat_id="$TELEGRAM_CHAT_ID" \ + -F message_thread_id="$1" \ + -F document=@"$PROMPT_FILE;filename=review-prompt.md" \ + -F caption="Review prompt — reply with your feedback" +} + +# Watermark: highest update_id currently in the queue. Set BEFORE sending so a +# near-instant reply (update_id arriving during the sendDocument roundtrip) is +# still newer than SENTINEL and isn't filtered out. offset=-1 peeks the tail +# without advancing the global confirmation pointer — confirming via a positive +# offset would delete updates queued for other parallel runs of this bot. +SENTINEL=$(curl -s "${TELEGRAM_API}/getUpdates?offset=-1&limit=1" | jq -r '.result[-1].update_id // 0') + +THREAD_ID=$(tg_resolve_topic_id "$TOPIC_NAME") + +SEND_RESPONSE=$(send_prompt "$THREAD_ID") +if [[ "$(echo "$SEND_RESPONSE" | jq -r '.ok')" != "true" ]]; then + DESC=$(echo "$SEND_RESPONSE" | jq -r '.description // ""') + # User may have closed or deleted the topic between runs — invalidate cache and recreate. + if tg_is_stale_thread_error "$DESC"; then + echo "Warning: cached topic $THREAD_ID no longer usable — recreating..." >&2 + tg_forget_topic "$TOPIC_NAME" + THREAD_ID=$(tg_resolve_topic_id "$TOPIC_NAME") + SEND_RESPONSE=$(send_prompt "$THREAD_ID") + fi fi - -# Send the prompt as a document attachment -SEND_RESPONSE=$(curl -s -X POST "${TELEGRAM_API}/sendDocument" \ - -F chat_id="$TELEGRAM_CHAT_ID" \ - -F document=@"$PROMPT_FILE;filename=review-prompt.md" \ - -F caption="Review prompt — reply with your feedback") - -SENT_OK=$(echo "$SEND_RESPONSE" | jq -r '.ok') -if [[ "$SENT_OK" != "true" ]]; then +if [[ "$(echo "$SEND_RESPONSE" | jq -r '.ok')" != "true" ]]; then echo "Error: Failed to send Telegram message: $SEND_RESPONSE" >&2 rm -f "$PROMPT_FILE" exit 1 fi - rm -f "$PROMPT_FILE" echo "" >&2 -echo "=== Prompt sent to Telegram ===" >&2 -echo "Reply in the Telegram chat with your feedback." >&2 +echo "=== Prompt sent to Telegram topic '$TOPIC_NAME' (thread $THREAD_ID) ===" >&2 +echo "Reply in that topic with your feedback." >&2 echo "Waiting for reply..." >&2 -# Long-poll for a reply, collecting split messages over a 10s window COLLECTED="" DEADLINE="" while true; do - # After first message arrives, switch to short polls and enforce deadline - if [[ -n "$DEADLINE" ]]; then - NOW=$(date +%s) - if [[ $NOW -ge $DEADLINE ]]; then - break - fi - POLL_TIMEOUT=2 - else - POLL_TIMEOUT=30 + # Negative offset = read tail without advancing the global confirmation + # pointer. Every parallel run sees the same updates and each filters for its + # own thread_id. We always poll once per iteration, then check the deadline, + # so the final poll covers the full 10s window (checking before polling + # would drop messages arriving in the last sleep interval). + UPDATES=$(curl -s "${TELEGRAM_API}/getUpdates?offset=-100&limit=100") + if [[ "$(echo "$UPDATES" | jq -r '.ok // false')" != "true" ]]; then + sleep 2 + continue fi - UPDATES=$(curl -s "${TELEGRAM_API}/getUpdates?offset=${OFFSET}&timeout=${POLL_TIMEOUT}") - - MSG_COUNT=$(echo "$UPDATES" | jq --arg cid "$TELEGRAM_CHAT_ID" ' - [.result[] | select(.message.chat.id == ($cid | tonumber) and .message.text != null)] | length + BATCH=$(echo "$UPDATES" | jq -r \ + --arg cid "$TELEGRAM_CHAT_ID" \ + --argjson thread "$THREAD_ID" \ + --argjson sentinel "$SENTINEL" ' + [.result[] + | select(.message.chat.id == ($cid|tonumber) + and .message.text != null + and (.message.message_thread_id // 0) == $thread + and .update_id > $sentinel)] + | map(.message.text) | join("\n") + ') + MAX_ID=$(echo "$UPDATES" | jq -r \ + --arg cid "$TELEGRAM_CHAT_ID" \ + --argjson thread "$THREAD_ID" \ + --argjson sentinel "$SENTINEL" ' + [.result[] + | select(.message.chat.id == ($cid|tonumber) + and .message.text != null + and (.message.message_thread_id // 0) == $thread + and .update_id > $sentinel) + | .update_id] + | max // 0 ') - if [[ "$MSG_COUNT" -gt 0 ]]; then - NEW_TEXTS=$(echo "$UPDATES" | jq -r --arg cid "$TELEGRAM_CHAT_ID" ' - [.result[] | select(.message.chat.id == ($cid | tonumber) and .message.text != null)] - | .[].message.text - ') + if [[ -n "$BATCH" ]]; then if [[ -n "$COLLECTED" ]]; then COLLECTED="${COLLECTED} -${NEW_TEXTS}" +${BATCH}" else - COLLECTED="$NEW_TEXTS" + COLLECTED="$BATCH" + fi + if (( MAX_ID > SENTINEL )); then + SENTINEL=$MAX_ID fi - - # Start 10s collection window on first message if [[ -z "$DEADLINE" ]]; then DEADLINE=$(( $(date +%s) + 10 )) echo "=== First message received, collecting for 10s... ===" >&2 fi fi - # Advance offset past all updates - NEW_LAST=$(echo "$UPDATES" | jq -r '.result[-1].update_id // empty') - if [[ -n "$NEW_LAST" ]]; then - OFFSET=$((NEW_LAST + 1)) + if [[ -n "$DEADLINE" ]]; then + NOW=$(date +%s) + (( NOW >= DEADLINE )) && break fi -done -# Acknowledge all collected updates -curl -s "${TELEGRAM_API}/getUpdates?offset=${OFFSET}" > /dev/null + sleep 2 +done echo "$COLLECTED" > "$FEEDBACK_FILE" echo "=== Feedback received from Telegram ===" >&2 diff --git a/.loopx/shared/telegram-lib.sh b/.loopx/shared/telegram-lib.sh new file mode 100644 index 0000000..cd17d49 --- /dev/null +++ b/.loopx/shared/telegram-lib.sh @@ -0,0 +1,113 @@ +# Shared telegram helpers. Source (don't exec) this file from shared/ scripts +# that need to resolve per-run forum topics, post into them, or emit workflow- +# labeled alerts to General. +# +# Required env: TELEGRAM_BOT_TOKEN, TELEGRAM_CHAT_ID, LOOPX_PROJECT_ROOT, LOOPX_WORKFLOW. +# Exports: TELEGRAM_API plus the tg_* functions below. + +: "${TELEGRAM_BOT_TOKEN:?TELEGRAM_BOT_TOKEN env var is required}" +: "${TELEGRAM_CHAT_ID:?TELEGRAM_CHAT_ID env var is required}" +: "${LOOPX_PROJECT_ROOT:?LOOPX_PROJECT_ROOT env var is required}" +: "${LOOPX_WORKFLOW:?LOOPX_WORKFLOW env var is required}" + +TELEGRAM_API="https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}" + +_TG_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# The caller workflow that owns this review cycle. When shared/ scripts run as +# LOOPX_WORKFLOW=shared (via shared:dispatch/check-question/apply-answer), the +# caller's name lives in .caller.tmp, written by the caller's index.sh. When +# running directly under a caller workflow, LOOPX_WORKFLOW is already the +# caller. +tg_caller_workflow() { + local caller_file="$LOOPX_PROJECT_ROOT/.loopx/$LOOPX_WORKFLOW/.caller.tmp" + if [[ -s "$caller_file" ]]; then + tr -d '\n' < "$caller_file" + else + printf '%s' "$LOOPX_WORKFLOW" + fi +} + +# Forum topic name: " / [ / ADR-NNNN ]". One topic per (repo, +# caller, optional ADR) so parallel runs in different repos or on different +# ADRs don't collide. +tg_topic_name() { + local owner name padded resolved + owner=$(tg_caller_workflow) + name="$(basename "$LOOPX_PROJECT_ROOT") / $owner" + if [[ -n "${ADR:-}" ]]; then + if resolved=$("$_TG_LIB_DIR/resolve-adr.sh" 2>/dev/null); then + padded=$(printf '%s' "$resolved" | cut -f1) + name="$name / ADR-$padded" + fi + fi + printf '%s' "$name" +} + +# Short label for alerts routed to General so it's obvious which run is +# speaking: " / [ / ADR-NNNN]". +tg_alert_label() { + tg_topic_name +} + +_TG_TOKEN_HASH=$(printf '%s' "$TELEGRAM_BOT_TOKEN" | sha256sum | cut -c1-12) +_TG_CACHE_DIR="${XDG_CACHE_HOME:-$HOME/.cache}/loopx-telegram" +mkdir -p "$_TG_CACHE_DIR" +_TG_CACHE_FILE="$_TG_CACHE_DIR/topics-$_TG_TOKEN_HASH.json" +_TG_LOCK_FILE="$_TG_CACHE_DIR/topics-$_TG_TOKEN_HASH.lock" + +# Resolve (and if needed create) the message_thread_id for a topic name. Echoes +# the numeric id on stdout. Flocked so concurrent first-runs don't double-create +# the same topic. +tg_resolve_topic_id() { + local topic="$1" + local cache_key="${TELEGRAM_CHAT_ID}|${topic}" + ( + exec 9>"$_TG_LOCK_FILE" + flock -x 9 + [[ -f "$_TG_CACHE_FILE" ]] || echo '{}' > "$_TG_CACHE_FILE" + local id resp + id=$(jq -r --arg k "$cache_key" '.[$k] // empty' "$_TG_CACHE_FILE") + if [[ -z "$id" ]]; then + resp=$(curl -s -X POST "${TELEGRAM_API}/createForumTopic" \ + --data-urlencode "chat_id=$TELEGRAM_CHAT_ID" \ + --data-urlencode "name=$topic") + if [[ "$(echo "$resp" | jq -r '.ok')" != "true" ]]; then + echo "Error: failed to create Telegram topic '$topic': $resp" >&2 + echo "Hint: TELEGRAM_CHAT_ID must point to a forum-enabled supergroup, and the bot must have 'Manage Topics' admin rights." >&2 + exit 1 + fi + id=$(echo "$resp" | jq -r '.result.message_thread_id // empty') + if [[ -z "$id" ]]; then + echo "Error: createForumTopic returned ok but no message_thread_id: $resp" >&2 + exit 1 + fi + jq --arg k "$cache_key" --argjson id "$id" '. + {($k): $id}' "$_TG_CACHE_FILE" > "$_TG_CACHE_FILE.tmp" + mv "$_TG_CACHE_FILE.tmp" "$_TG_CACHE_FILE" + fi + printf '%s' "$id" + ) +} + +# Drop a topic from the cache so the next tg_resolve_topic_id recreates it. +# Call this when a send fails with "thread not found" / closed / deleted. +tg_forget_topic() { + local topic="$1" + local cache_key="${TELEGRAM_CHAT_ID}|${topic}" + ( + exec 9>"$_TG_LOCK_FILE" + flock -x 9 + [[ -f "$_TG_CACHE_FILE" ]] || exit 0 + jq --arg k "$cache_key" 'del(.[$k])' "$_TG_CACHE_FILE" > "$_TG_CACHE_FILE.tmp" + mv "$_TG_CACHE_FILE.tmp" "$_TG_CACHE_FILE" + ) +} + +# True if a sendMessage/sendDocument error description indicates the cached +# thread_id is no longer usable and should be recreated. +tg_is_stale_thread_error() { + local desc="$1" + [[ "$desc" == *"thread not found"* ]] \ + || [[ "$desc" == *"TOPIC_DELETED"* ]] \ + || [[ "$desc" == *"topic closed"* ]] +} diff --git a/.loopx/spec-test-adr/apply-feedback.sh b/.loopx/spec-test-adr/apply-feedback.sh index af7ac81..3c39219 100755 --- a/.loopx/spec-test-adr/apply-feedback.sh +++ b/.loopx/spec-test-adr/apply-feedback.sh @@ -5,6 +5,7 @@ ROOT="$LOOPX_PROJECT_ROOT" SHARED_DIR="$ROOT/.loopx/shared" FEEDBACK_FILE="$SHARED_DIR/.feedback.tmp" CLAUDE_OUTPUT_FILE="$SHARED_DIR/.claude-output.tmp" +SESSION_FILE="$SHARED_DIR/.session.tmp" if [[ ! -f "$FEEDBACK_FILE" ]]; then echo "Error: No feedback file found at $FEEDBACK_FILE" >&2 @@ -13,25 +14,19 @@ fi FEEDBACK=$(cat "$FEEDBACK_FILE") -ADR_0002=$(cat "$ROOT/adr/0002-run-subcommand.md") -SPEC=$(cat "$ROOT/SPEC.md") -TEST_SPEC=$(cat "$ROOT/TEST-SPEC.md") +RESOLVED=$("$SHARED_DIR/resolve-adr.sh") +IFS=$'\t' read -r ADR_NUM ADR_FILE <<< "$RESOLVED" +ADR_REL="adr/$(basename "$ADR_FILE")" -PROMPT="ADR-0002 has been accepted and SPEC.md has already been updated to incorporate its changes (per the ADR process in ADR-0001). I am now updating TEST-SPEC.md to cover the new and changed spec behavior introduced by ADR-0002. I received the following feedback on the current state of TEST-SPEC.md. Apply this feedback by updating TEST-SPEC.md only — do not modify SPEC.md or ADR-0002, they are read-only references in this cycle. ADR-0002 and the updated SPEC.md are the authoritative sources for what TEST-SPEC.md should cover. If there is any ambiguity about my intentions, ask me clarifying questions. Think critically about this feedback and push back if warranted. After you finish, commit and push. - -adr/0002-run-subcommand.md (accepted — read-only reference): -$ADR_0002 - -SPEC.md (already updated for ADR-0002 — read-only reference): -$SPEC - -Current TEST-SPEC.md (target of updates): -$TEST_SPEC +PROMPT="ADR-$ADR_NUM ($ADR_REL) has been accepted and SPEC.md has already been updated to incorporate its changes (per the ADR process in adr/0001-adr-process.md). I am now updating TEST-SPEC.md to cover the new and changed spec behavior introduced by ADR-$ADR_NUM. I received the following feedback on the current state of TEST-SPEC.md. Apply this feedback by updating TEST-SPEC.md only — do not modify SPEC.md or ADR-$ADR_NUM, they are read-only references in this cycle. ADR-$ADR_NUM and the updated SPEC.md are the authoritative sources for what TEST-SPEC.md should cover. If there is any ambiguity about my intentions, ask me clarifying questions. When you ask questions, ask only one at a time and wait for my answer before asking the next — do not batch multiple questions together. I have not read the feedback, I only pasted it in, so phrase each question to stand on its own: include the relevant context or quote from the feedback so I can answer without having to go read it. Think critically about this feedback and push back if warranted. After you finish, commit and push. Feedback: $FEEDBACK" -CLAUDE_OUTPUT=$(echo "$PROMPT" | claude --dangerously-skip-permissions -p 2>/dev/null) +SESSION_ID=$(uuidgen | tr '[:upper:]' '[:lower:]') +echo "$SESSION_ID" > "$SESSION_FILE" + +CLAUDE_OUTPUT=$(echo "$PROMPT" | claude --dangerously-skip-permissions --session-id "$SESSION_ID" -p 2>/dev/null) rm -f "$FEEDBACK_FILE" echo "$CLAUDE_OUTPUT" > "$CLAUDE_OUTPUT_FILE" diff --git a/.loopx/spec-test-adr/index.sh b/.loopx/spec-test-adr/index.sh index cc4f82c..595c73b 100755 --- a/.loopx/spec-test-adr/index.sh +++ b/.loopx/spec-test-adr/index.sh @@ -3,7 +3,6 @@ set -euo pipefail ROOT="$LOOPX_PROJECT_ROOT" ADR_0001="$ROOT/adr/0001-adr-process.md" -ADR_0002="$ROOT/adr/0002-run-subcommand.md" SPEC="$ROOT/SPEC.md" TEST_SPEC="$ROOT/TEST-SPEC.md" SHARED_DIR="$ROOT/.loopx/shared" @@ -20,11 +19,6 @@ if [[ ! -f "$ADR_0001" ]]; then exit 1 fi -if [[ ! -f "$ADR_0002" ]]; then - echo "Error: adr/0002-run-subcommand.md not found" >&2 - exit 1 -fi - if [[ ! -f "$SPEC" ]]; then echo "Error: SPEC.md not found" >&2 exit 1 @@ -35,17 +29,21 @@ if [[ ! -f "$TEST_SPEC" ]]; then exit 1 fi +RESOLVED=$("$SHARED_DIR/resolve-adr.sh") +IFS=$'\t' read -r ADR_NUM ADR_FILE <<< "$RESOLVED" +ADR_REL="adr/$(basename "$ADR_FILE")" + echo "$LOOPX_WORKFLOW" > "$CALLER_FILE" cat < "$PROMPT_FILE" -ADR 0002 has been accepted and SPEC.md has already been updated to incorporate its changes (ADR status: "Spec Updated"). Per the ADR process in ADR-0001, the next step is to update TEST-SPEC.md to cover the new and changed spec behavior introduced by ADR-0002. In this cycle, TEST-SPEC.md is the only file that should be modified — SPEC.md and ADR-0002 are read-only references. +ADR $ADR_NUM has been accepted and SPEC.md has already been updated to incorporate its changes (ADR status: "Spec Updated"). Per the ADR process in ADR-0001, the next step is to update TEST-SPEC.md to cover the new and changed spec behavior introduced by ADR-$ADR_NUM. In this cycle, TEST-SPEC.md is the only file that should be modified — SPEC.md and ADR-$ADR_NUM are read-only references. -Review the current TEST-SPEC.md against the updated SPEC.md and ADR-0002, and let me know whether TEST-SPEC.md already covers the ADR-0002 changes correctly and completely, or what needs to be added, changed, or removed. Do not suggest changes to SPEC.md or ADR-0002 — if something looks wrong in those, flag it but do not act on it. +Review the current TEST-SPEC.md against the updated SPEC.md and ADR-$ADR_NUM, and let me know whether TEST-SPEC.md already covers the ADR-$ADR_NUM changes correctly and completely, or what needs to be added, changed, or removed. Do not suggest changes to SPEC.md or ADR-$ADR_NUM — if something looks wrong in those, flag it but do not act on it. -adr/0002-run-subcommand.md (accepted — read-only reference): -$(cat "$ADR_0002") +$ADR_REL (accepted — read-only reference): +$(cat "$ADR_FILE") -SPEC.md (already updated for ADR-0002 — read-only reference): +SPEC.md (already updated for ADR-$ADR_NUM — read-only reference): $(cat "$SPEC") TEST-SPEC.md (target of updates): diff --git a/SPEC-PROBLEMS.md b/SPEC-PROBLEMS.md index 96ae7b7..6d3658b 100644 --- a/SPEC-PROBLEMS.md +++ b/SPEC-PROBLEMS.md @@ -1,13 +1,51 @@ -## T-INST-GLOBAL-01a: Bun global install `import "loopx"` resolution +# SPEC Problems -The npm package is named `loop-extender` (not `loopx`). For local installs, a symlink `node_modules/loopx → dist/` makes `import "loopx"` work via NODE_PATH. For global installs (`npm install -g`), the package is installed as `/lib/node_modules/loop-extender/` — there is no `loopx` symlink. +This file tracks ambiguities, gaps, or under-specified clauses in SPEC.md that prevent TEST-SPEC.md from covering the behavior cleanly. Each entry is scoped to a specific ADR cycle. Resolve in a follow-up cycle by updating SPEC.md and removing the entry; delete the file when no entries remain. -Under Node.js, the custom module loader (`--import` with `module.register()`) intercepts the bare specifier `"loopx"` regardless of directory names. Under Bun, the only resolution mechanism is NODE_PATH, which requires a directory named `loopx` somewhere in the search path. +## ADR-0004 -**Impact**: T-INST-GLOBAL-01a (Bun global install with `import { output } from "loopx"`) cannot work until the package is either renamed to `loopx` or a postinstall hook creates a `loopx` symlink in the global node_modules. The test currently uses a bash script instead of a TS script with imports. +### P-0004-01 — Abort-after-final-yield vs. already-observed consumer `.return()` / `.throw()` before settlement -**Resolution**: Rename the npm package to `loopx`, or add a `postinstall` script that creates a symlink from the package directory to a `loopx` entry in the parent node_modules directory. +**Where.** SPEC §9.3 ("Abort after final yield") and §7.2 ("Terminal-outcome precedence"). ADR-0004 §"Programmatic API · Termination" line "Abort after the final yielded `Output` still wins until generator settlement." -## T-PARSE-20a: Ambiguous spec wording about "first iteration" +**Problem.** SPEC §9.3 says: "An `AbortSignal` that aborts after the final `Output` has been yielded ... but before the generator settles via `{ done: true }` produces the abort error on the next generator interaction — `g.next()`, `.return()`, or `.throw()`." This text is clear when the abort is observed *before* the next consumer interaction begins. It is not clear what wins when the consumer has *already* invoked `.return()` / `.throw()` after the final yield, loopx has observed that interaction first and entered the cleanup routine, and abort then arrives during cleanup before the generator settles. -TEST-SPEC says: "the generator throws an error on the first iteration." This is misleading — the generator yields the first iteration's Output (which contains `{goto: ""}`) and then throws on the *next* `.next()` call when it tries to resolve the empty goto target. The `iterationCount` is 1 (one yield) before the error, not 0. The spec wording should say "throws an error after the first iteration yields" or "throws during goto resolution following the first iteration." +Two readings are reasonable: + +1. **§9.3 is an explicit precedence rule that holds until settlement.** Abort after final yield wins over any non-settlement outcome until `{ done: true }`, including an already-observed but not-yet-settled consumer `.return()` / `.throw()`. ADR-0004's phrasing — "abort still wins until generator settlement" — supports this reading. + +2. **§9.3 only applies when abort precedes the next interaction.** Once a consumer interaction has been observed first, the §7.2 first-observed-wins residual rule applies and the consumer interaction's outcome (silent clean completion under §9.1's no-active-child swallow rule) survives a later abort that races during cleanup. + +The SPEC text alone does not disambiguate. + +**Impact on TEST-SPEC.** T-TMP-38d2 (post-final-yield consumer `.throw()` observed first × racing abort during cleanup) currently picks reading 2 and pins the surfaced outcome to `{ done: true }`. Until SPEC clarifies, the surfaced-outcome assertion is contested; T-TMP-38d2 has been relaxed to assert only the cleanup-idempotence and warning-cardinality contract that *is* pinned by SPEC §7.2. Symmetric concerns apply to a corresponding `.return()`-first × abort-second post-final-yield variant, should one be added. + +**Resolution requested.** Either: +- Amend §9.3 to state explicitly that the abort-after-final-yield precedence rule holds until settlement and displaces any already-observed but not-yet-settled consumer `.return()` / `.throw()` outcome, or +- Amend §9.3 / §7.2 to state explicitly that once a post-final-yield `.return()` / `.throw()` is observed first, later aborts before settlement fall back to the §7.2 first-observed-wins residual rule. + +Either resolution unblocks pinning the surfaced-outcome axis for T-TMP-38d2 (and any symmetric `.return()` variant). + +--- + +### P-0004-02 — Outer `RunOptions` inherited-field semantics + +**Where.** SPEC §9.5 (`RunOptions` shape), §9.1 / §9.2 ("Option-snapshot timing"). ADR-0004 §"Programmatic API · `RunOptions.env`". + +**Problem.** SPEC §9.5 explicitly specifies own-enumerable-only semantics for `options.env` ("Symbol-keyed, non-enumerable, and inherited properties are ignored"). It is silent on whether the *outer* `options` object's recognized fields (`signal`, `cwd`, `envFile`, `maxIterations`, `env`) are read via ordinary JS property access (which honors inherited properties) or via own-enumerable-only access matching the inner `env` rule. + +Existing behavior implied by adjacent rules pulls in opposite directions: + +- The duck-typed signal compatibility check honors prototype-inherited `aborted` / `addEventListener` (T-API-64n / T-API-64n2 already pin this), suggesting ordinary property access is the convention. +- SPEC §9.5's own-enumerable-only rule for `env` is *explicit*, suggesting the outer options would have been similarly explicit if the same restriction applied. + +But the SPEC does not state which rule applies for the outer options' fields, and TEST-SPEC has no direct coverage of prototype-inherited `cwd` / `envFile` / `maxIterations` / `env` / `signal` on the outer options object. + +**Impact on TEST-SPEC.** No tests can be added for the inherited-field semantics on the outer options without picking an interpretation. T-API-61i / T-API-61k cover null-prototype and class-instance options *with own properties*; T-API-61l covers `Map` (no recognized field names appear as own properties). None test inherited fields on the outer options. + +**Resolution requested.** Amend SPEC §9.5 (or the §9.1 / §9.2 option-snapshot-timing paragraphs) to state explicitly whether the outer `options` object's recognized fields are read via: + +1. Ordinary JS property access (honoring inherited fields), matching the duck-typed signal precedent, or +2. Own-enumerable-only access, matching the inner `options.env` rule. + +Either resolution unblocks adding the corresponding test coverage (inherited `maxIterations` / `cwd` / `envFile` / `signal` / `env` honored or ignored). diff --git a/SPEC.md b/SPEC.md index 517af3d..15a62a2 100644 --- a/SPEC.md +++ b/SPEC.md @@ -78,14 +78,14 @@ Each workflow has a **default entry point**: a script named `index` (i.e., `inde A workflow may include a `package.json` that serves two optional purposes: -1. **Dependency management:** The workflow can declare its own dependencies. Users manage installation themselves (`npm install` / `bun install` within the workflow directory). loopx does not auto-install dependencies. If `node_modules/` is missing and the script fails to import a package, the resulting error is the active runtime's normal module resolution error. +1. **Dependency management:** The workflow can declare its own dependencies. `loopx install` auto-runs `npm install` in each committed workflow that contains a top-level `package.json` (see section 10.10), so a workflow's `node_modules/` is normally populated by the time `loopx run` first spawns one of its scripts. The presence of a top-level `package.json` is the sole trigger — whether it declares any dependencies is not inspected. The `--no-install` flag on `loopx install` suppresses the install-time step for users who prefer manual control or a non-npm package manager. At runtime, loopx does **not** re-install dependencies — `loopx run` does not invoke `npm install` on a missing `node_modules/`. If `node_modules/` is missing and a script fails to import a package, the resulting error is the active runtime's normal module resolution error. 2. **Version declaration:** The workflow can declare a `loopx` version requirement (see section 3.2). The `main` field is no longer used to determine the entry point. The entry point is always the `index` script by convention. If a `package.json` contains a `main` field, it is ignored by loopx. The `type` field (`"module"`) continues to be relevant for Node.js module resolution within the workflow. -**Failure modes:** If a workflow's `package.json` is absent, unreadable, contains invalid JSON, or declares an invalid semver range for `loopx`, see section 3.2 for the defined behavior. In all cases, a broken `package.json` degrades version checking but does not prevent the workflow from being used or installed. +**Failure modes:** If a workflow's `package.json` is absent, unreadable, contains invalid JSON, or declares an invalid semver range for `loopx`, see section 3.2 for the defined behavior. A broken `package.json` degrades version checking but does not prevent the workflow from being used or installed; an unreadable `package.json`, invalid JSON, or an invalid `loopx` semver range also causes `loopx install`'s auto-install step to skip that workflow (section 10.10). ### 2.2 Loop @@ -190,7 +190,14 @@ A local install provides two guarantees: #### Project root -For loopx, the **project root** is always the invocation cwd. This is the same directory where `.loopx/` lives (when it exists), but the project root is determined by cwd alone — it does not depend on `.loopx/` existing. This means delegation, version pinning, and all project-root-relative behavior work regardless of whether `.loopx/` has been initialized. +For loopx, the **project root** depends on the invocation path: under the CLI it is `process.cwd()` at invocation, and under the programmatic API it is the resolved `RunOptions.cwd` when supplied, otherwise `process.cwd()` at call time. This is the same directory where `.loopx/` lives (when it exists), but the project root is determined by those rules alone — it does not depend on `.loopx/` existing. This means delegation, version pinning, and all project-root-relative behavior work regardless of whether `.loopx/` has been initialized. + +**Project-root resolution.** Two paths set the project root: + +- **CLI:** `LOOPX_PROJECT_ROOT` is exactly the string returned by loopx's own `process.cwd()` at invocation. loopx does not consult `$PWD` and does not apply `realpath` / canonicalization beyond what the runtime already provides. On POSIX systems where `getcwd(3)` canonicalizes, a CLI invocation from a symlinked directory yields a canonical `LOOPX_PROJECT_ROOT`. +- **Programmatic:** `LOOPX_PROJECT_ROOT` is `RunOptions.cwd` if supplied, otherwise `process.cwd()` at call time. A relative `RunOptions.cwd` is resolved via `path.resolve(process.cwd(), options.cwd)` once at call time; an absolute value is used unchanged. `path.resolve` is a pure string operation — symlinked components in `RunOptions.cwd` are preserved verbatim. No further `realpath` is applied; callers who need a canonical realpath compute one themselves. + +The project root is also the script execution cwd for every spawned script (see section 6.1) and the root beneath which `.loopx/` workflow discovery runs; the discovery-time workflow paths cached during that step are what loopx later injects as `LOOPX_WORKFLOW_DIR` (see section 6.1 and section 8). The injected `LOOPX_WORKFLOW_DIR` is read from those cached paths, not recomposed from project-root pieces. For `RunOptions.cwd` type and validation rules, see section 9.5. #### Resolution order (highest precedence first) @@ -260,7 +267,11 @@ Scripts spawned by loopx need access to the `output` and `input` helpers via `im **For Bun:** Bun's module resolver supports `NODE_PATH` for both CJS and ESM. loopx sets `NODE_PATH` to include its own package directory when running under Bun. -In both cases, the resolution **points to the post-delegation version** when no closer `node_modules/loopx` exists. If a local install triggered delegation, the helpers resolve to the local version's package. However, if a workflow has its own `node_modules/loopx`, standard module resolution applies and the closer package takes precedence over the CLI-provided one. This is a natural consequence of running scripts with the workflow directory as cwd (section 6.1). +**Resolution precedence.** Standard file-relative resolution of `"loopx"` from the importing module's path always wins; the CLI-provided fallback applies only when no `node_modules/loopx` is found by standard resolution. This precedence is independent of cwd — Node ESM, tsx, and Bun resolve bare specifiers and relative imports from the importing module's file path, not from `process.cwd()`. The custom Node/tsx resolve hook installed via `module.register()` must preserve standard precedence: it must first attempt standard file-relative resolution (via `defaultResolve` delegation or an equivalent file-path walk-up) and fall back to the CLI package exports only when standard resolution finds no `node_modules/loopx`. + +In practice, when no closer `node_modules/loopx` exists, resolution **points to the post-delegation version** — if a local install triggered delegation, the helpers resolve to the local version's package; otherwise they resolve to the global install. When standard resolution sees a closer `node_modules/loopx`, it wins and the CLI fallback does not run; however, under symlinked JS/TS entry paths, Node's main-module realpath behavior may prevent a workflow-local `node_modules/loopx` on the logical path from being seen (see the symlink caveat below). + +**Symlink caveat for JS/TS entry paths.** Node's default main-module handling applies `realpath` to the main module's file path. When the absolute discovery-time entry path traverses any symlink (workflow directory entry, entry script file, or intermediate ancestor), the resolver walks up from the canonical target rather than the symlink-preserving spelling, so workflow-local `node_modules/loopx` on the logical path is not guaranteed to win even with a correctly delegating hook. loopx does **not** override standard module resolution to force the CLI version. This means a workflow with a locally installed `loopx` may get different helper behavior than the running CLI provides. The workflow's `package.json` version declaration (section 3.2) serves as the intended mechanism for surfacing version mismatches. No warning is emitted for this scenario in v1. @@ -291,7 +302,9 @@ Scripts are executed exclusively via the `run` subcommand. `run` accepts exactly - `loopx run ralph:check-ready` runs the `check-ready` script in the `ralph` workflow. - `loopx run ralph:index` explicitly runs the `index` script (same as bare `loopx run ralph`). - The target is required. `loopx run` with no target (e.g., `loopx run` or `loopx run -n 5`) is a usage error (exit code 1). This does not inspect `.loopx/` or perform discovery. -- More than one positional argument (e.g., `loopx run ralph bar`) is a usage error (exit code 1). +- More than one positional argument (e.g., `loopx run ralph bar`) is a usage error (exit code 1). A `name=value` token in a second positional position is rejected on the same "more than one positional" rule — `loopx run ralph adr=0003` is a usage error, not a named-argument syntax. +- A sole positional `name=value` (e.g., `loopx run adr=0003`) is parsed as a target string. The string contains `=`, which is not in the `[a-zA-Z0-9_][a-zA-Z0-9_-]*` pattern, so target-syntax validation rejects it. The surfaced CLI failure still follows the normal pre-iteration ordering in section 7.1 — for example, `.loopx/` discovery may fail first if the directory is missing, in which case the discovery error is what the user sees. +- `--` is **not** an end-of-options marker for `run`. Outside the `-h` / `--help` short-circuit, `--` in any position is rejected as an unrecognized flag / usage error under the existing rejection rules. The sole CLI surface for per-run values is the shell env prefix (`key=value loopx run `), which flows through inherited `process.env` at the section 8.3 inherited-env tier. - If the workflow does not exist in `.loopx/`, loopx exits with an error. If the workflow exists but the specified script does not, loopx exits with an error. - If a workflow has no `index` script and is invoked without specifying a script (e.g., `loopx run ralph`), loopx exits with an error (exit code 1). The workflow is still valid — its scripts can be targeted explicitly. - `loopx` with no arguments shows top-level help (equivalent to `loopx -h`). No discovery is performed. @@ -314,7 +327,7 @@ The following target strings are invalid in all contexts — CLI invocation (`lo Existing structured-output parsing semantics are unchanged: if `goto` is present but not a string, it is treated as absent. Target validation applies only after a `goto` value has been parsed as a string. -For CLI invocation and the programmatic API, invalid targets are rejected at the same point as a missing workflow (after discovery, or lazily on first iteration for the API). For `goto` values, invalid targets produce an error at transition time (exit code 1). +For the CLI, invalid targets are rejected at the same point as a missing workflow (after discovery), under the pre-iteration ordering in section 7.1. For the programmatic API, invalid targets surface lazily on first iteration (`run()`) or promise rejection (`runPromise()`), on the pre-iteration error path; relative ordering against other pre-iteration failures is implementation-defined except for the pinned option-error and abort-precedence rules in section 9.3. For `goto` values, invalid targets produce an error at transition time (exit code 1). ### 4.2 Options @@ -351,6 +364,8 @@ Within `run`, options and the target may appear in any order. - Target requirements are suppressed (zero or multiple positionals are not errors). - `-n` and `-e` values are not parsed or validated (including duplicates and invalid values). - Unknown flags are ignored. +- `--` in any position is ignored (not rejected as the unrecognized flag it would otherwise be under section 4.1). +- `name=value` positional tokens in any position are ignored (not rejected as extra positionals or as invalid target strings). - Examples: - `loopx run -h ralph` — shows run help (target ignored). - `loopx run ralph -h` — shows run help (`-h` after target still triggers help short-circuit). @@ -360,6 +375,8 @@ Within `run`, options and the target may appear in any order. - `loopx run -h -n 5 -n 10` — shows run help (duplicate `-n` not rejected). - `loopx run -h foo bar` — shows run help (extra positional not rejected). - `loopx run -h --unknown` — shows run help (unknown flag not rejected). + - `loopx run -h -- ralph` — shows run help (`--` not rejected). + - `loopx run -h ralph adr=0003` — shows run help (`name=value` token not rejected). #### `install`-scoped options @@ -367,13 +384,14 @@ Within `run`, options and the target may appear in any order. |------|-------------| | `-w `, `--workflow ` | Install only the named workflow from a multi-workflow source (see section 10.8). | | `-y` | Override version mismatch and workflow collision checks (see sections 10.5 and 10.6). | +| `--no-install` | Suppress auto-install of workflow dependencies (see section 10.10). When present, `loopx install` commits workflow files but does not run `npm install` and does not synthesize the `.gitignore` safeguard for any selected workflow. No short form. | | `-h`, `--help` | Print install help and exit. | -**Duplicate flags:** Repeating `-w` or `-y` is a usage error (exit code 1) — unless `-h` / `--help` is present. +**Duplicate flags:** Repeating `-w`, `-y`, or `--no-install` is a usage error (exit code 1) — unless `-h` / `--help` is present. **Unrecognized flags:** Unrecognized flags (e.g., `loopx install --unknown `) are usage errors (exit code 1) — unless `-h` / `--help` is present. -**`install -h` / `--help` short-circuit:** When `-h` / `--help` is present, loopx shows install help, exits 0, and ignores all other install-level arguments unconditionally. Source is not required, flags are not validated, and no network requests are made. +**`install -h` / `--help` short-circuit:** When `-h` / `--help` is present, loopx shows install help, exits 0, and ignores all other install-level arguments unconditionally. Source is not required, flags are not validated (including duplicate or extraneous `--no-install`), and no network requests are made. ### 4.3 Subcommands @@ -469,13 +487,13 @@ Legacy layouts are not recognized: loose script files placed directly in `.loopx - Subdirectories within a workflow are ignored. - Name collisions (e.g., `check.sh` and `check.ts` in the same workflow) follow the rules in section 5.2. -**Symlink policy:** Symlinks within `.loopx/` are followed during discovery. A symlinked workflow directory or script file is treated identically to its non-symlinked equivalent. Symlink resolution does not affect workflow or script naming — names are derived from the symlink's own name, not its target. +**Symlink policy:** Symlinks within `.loopx/` are followed during discovery to determine entry types — a symlinked `.loopx` directory, a symlinked `.loopx/` directory, and a symlinked entry script file are all treated identically to their non-symlinked equivalents for purposes of "is this a workflow / script?". The discovered path spelling is preserved (no `realpath` / canonicalization is applied), and that preserved spelling is what loopx caches for spawn-time invocation, `LOOPX_WORKFLOW_DIR` injection (section 6.1), and Bash `$0` derivation (section 6.2). Symlink resolution does not affect workflow or script naming — names are derived from the symlink's own name, not its target. **Discovery metadata is cached at loop start for the duration of the loop.** This means: - Workflows and scripts added, removed, or renamed during loop execution are not detected until the next invocation. - **Edits to the contents of an already-discovered script file take effect on subsequent iterations**, because the child process reads the file from disk each time it is spawned. -- **If a discovered script's underlying file is removed or renamed mid-loop**, execution uses the cached entry path and fails at spawn time as a normal child-process launch error. This is treated as a non-zero exit (section 7.2). +- **If a discovered script's underlying file is removed or renamed mid-loop**, execution uses the cached entry path and fails at spawn time. This is a child launch / spawn failure under section 7.2 (not a non-zero exit, since the child never starts running user code). Discovery runs at loop start for `loopx run ` and during `loopx run -h`. Discovery does **not** run for top-level help (`loopx -h` / `loopx --help` / bare `loopx`). @@ -522,14 +540,35 @@ Not all commands require `.loopx/` to exist or be valid: ### 6.1 Working Directory -All scripts run with the **workflow directory** as their working directory (e.g., `.loopx/ralph/`). This ensures relative imports and `node_modules/` resolve naturally. +All scripts run with **`LOOPX_PROJECT_ROOT` as their working directory** (see section 3.2). This applies to every script in the run — starting target, intra- and cross-workflow `goto` destinations, and loop resets all spawn at the same project-root cwd. Scripts remain free to `cd` (Bash) or `process.chdir()` (JS/TS) within the child process; such changes are scoped to that child and do not affect later spawns, which always start at project-root cwd. + +**Invocation path.** loopx invokes each script using its **absolute discovery-time path** (e.g., `bash /project/.loopx/review-adr/index.sh`). loopx does not canonicalize that path; the spelling preserved at discovery time (per section 5.1's symlink policy) is the spelling used at spawn time and is the basis for `LOOPX_WORKFLOW_DIR` (see below). + +**`LOOPX_PROJECT_ROOT` injection.** loopx injects `LOOPX_PROJECT_ROOT` into every script's environment, set to the absolute path of the project root (see section 3.2; for the programmatic API, this is `RunOptions.cwd` if provided — see section 9.5). Because cwd is already the project root, the variable is redundant for project-root-relative reads in the child's own working directory, but it remains injected for scripts that `cd` elsewhere or hand paths to tools that don't inherit cwd. + +**`LOOPX_WORKFLOW_DIR` injection.** loopx injects `LOOPX_WORKFLOW_DIR` into every script's environment, set to the absolute path of the workflow directory containing the currently-spawned script — specifically, the directory portion of the cached absolute discovery-time script path. In the v1 layout this normally corresponds to `"$LOOPX_PROJECT_ROOT/.loopx/$LOOPX_WORKFLOW"`, but the injected form is derived from the cached discovery path rather than recomposed from those components, so it remains consistent with the spawn path under trailing-slash variations, symlinked `.loopx/` entries, and any lexical normalization the implementation applies during discovery. The injected form is authoritative — scripts should prefer it over re-deriving the path. + +`LOOPX_WORKFLOW_DIR` tracks the currently-spawned script's containing workflow and refreshes per-spawn alongside `LOOPX_WORKFLOW`: starting-target spawn, intra-workflow `goto`, cross-workflow `goto` (A → B sets `LOOPX_WORKFLOW_DIR` to B's directory before the B-side spawn; deeper chains A → B → C each observe their own workflow's directory), and loop reset. Within a single script execution the value is stable — a top-level workflow script and a sibling helper it sources or imports both read the same `LOOPX_WORKFLOW_DIR` because the env var is injected once per child spawn and inherited by the whole process. + +**Symlink behavior.** `LOOPX_WORKFLOW_DIR` is derived from the discovery-time path, not via `realpath`. A symlinked `.loopx/` directory entry, a symlinked `.loopx` directory, a symlinked entry script file, or a symlinked project-root ancestor (via `RunOptions.cwd`) all preserve their discovered spelling in `LOOPX_WORKFLOW_DIR`. + +**Directory identity vs. string spelling.** loopx controls the child's *effective* working directory by spawning with `LOOPX_PROJECT_ROOT` as cwd (same device/inode as `LOOPX_PROJECT_ROOT`) but does not control the *string spelling* that runtime cwd APIs report. The `LOOPX_PROJECT_ROOT` spelling itself depends on how the project root was supplied (section 3.2): -loopx injects `LOOPX_PROJECT_ROOT` into every script's environment, set to the absolute path of the project root (see section 3.2; for the programmatic API, this is `RunOptions.cwd` if provided — see section 9.5). This is essential for scripts that need to reference project files outside their workflow directory. +- **CLI:** `LOOPX_PROJECT_ROOT` is exactly the string returned by loopx's own `process.cwd()` at invocation. On POSIX systems where `getcwd(3)` canonicalizes, this is already a canonical path, and `LOOPX_PROJECT_ROOT` does **not** preserve a symlinked spelling under the CLI. +- **Programmatic:** `LOOPX_PROJECT_ROOT` is `RunOptions.cwd` (resolved lexically via `path.resolve` with no `realpath`) or `process.cwd()` at call time. A symlink-preserving spelling supplied via `RunOptions.cwd` is preserved verbatim in `LOOPX_PROJECT_ROOT`; the `process.cwd()` fallback follows the same canonicalization characteristics as the CLI case. + +In either regime the child's effective cwd has the same device/inode as `LOOPX_PROJECT_ROOT`, but the *string spelling* that runtime cwd APIs report inside the child (e.g., Node/Bun's `process.cwd()`, which typically canonicalizes via `getcwd(3)`) is produced by the runtime and may differ from the spelling loopx used at spawn. Bash `$PWD` and bare `pwd` are shell-maintained and inherit loopx's own `PWD`, so byte-for-byte equality between Bash `$PWD` / bare `pwd` and `LOOPX_PROJECT_ROOT` is not a loopx guarantee in any regime. Scripts that need the loopx-chosen spelling read `LOOPX_PROJECT_ROOT`; scripts that need the kernel cwd spelling use `process.cwd()` (JS/TS) or `pwd -P` / `/bin/pwd` (Bash); scripts that need filesystem identity use `stat(".")`. + +**`PWD` is not a protocol variable.** loopx neither sets nor unsets `PWD` in the child environment; whatever value loopx's own process holds is inherited unchanged. A caller who wants a specific `PWD` in the child passes it via `RunOptions.env` (section 9.5) or inherited env, subject to Bash's standard POSIX inheritance behavior. `PWD` is outside the section 8.3 protocol-variable tier and outside the section 13 reserved list. + +**Cross-workflow rendezvous.** Scripts handing data across a cross-workflow `goto` use `$LOOPX_TMPDIR` (preferred — see section 7.4) or a shared fixed location like `$LOOPX_PROJECT_ROOT/.loopx/shared/…`. `LOOPX_WORKFLOW_DIR` always points at the currently-spawned script's own workflow and is not a rendezvous point. ### 6.2 Bash Scripts Bash scripts (`.sh`) are executed as child processes via `/bin/bash`. The script's stdout is captured as its structured output. Stderr is passed through to the user's terminal. +Because loopx invokes each script using its absolute discovery-time path (section 6.1), Bash `$0` is the absolute discovery-time path of the entry script. Bash does not canonicalize `$0`, so `$(dirname "$0")` equals `LOOPX_WORKFLOW_DIR` byte-for-byte regardless of whether symlinks appear anywhere in the entry path (workflow directory entry, entry script file, or intermediate component). This Bash equality is normative. + ### 6.3 JS/TS Scripts JavaScript and TypeScript scripts are executed as child processes using `tsx`, which handles `.js`, `.jsx`, `.ts`, and `.tsx` files uniformly. `tsx` is a dependency of loopx and does not need to be installed separately by the user. @@ -541,6 +580,8 @@ JavaScript and TypeScript scripts are executed as child processes using `tsx`, w When running under Bun, loopx uses Bun's native TypeScript/JSX support instead of `tsx`. +Because loopx invokes each script using its absolute discovery-time path (section 6.1), `dirname(fileURLToPath(import.meta.url))` equals `LOOPX_WORKFLOW_DIR` only when the absolute entry path is symlink-free in every component. Node applies `realpath` to the main module's file path by default, and Bun's behavior is not specified to match; loopx does not pass runtime-specific symlink-preservation flags. `LOOPX_WORKFLOW_DIR` is authoritative for JS/TS code that needs the discovery-time workflow path. + ### 6.4 `output()` Function (JS/TS) When imported from `loopx`, the `output()` function writes structured JSON to stdout and terminates the process. @@ -598,24 +639,25 @@ The first script invocation in a loop receives **no input**. Stdin is empty. 1. A target is required. If none was provided, this is a usage error (exit code 1) — see section 4.1. Discover workflows and scripts in the `.loopx/` directory per section 5.1. Validate for name collisions (section 5.2) and name restrictions (section 5.3) across **all** discovered workflows — these are fatal in run mode. Cache the discovery results. 2. Load environment variables (global + local via `-e`). Cache the resolved set for the duration of the loop. 3. Resolve the starting target from the target provided to `loopx run`. Parse the workflow and optional script portions. If the workflow does not exist in the cached discovery results, exit with an error. If a script was specified and does not exist in the workflow, exit with an error. If no script was specified, resolve to `index` — if the workflow has no `index` script, exit with an error. -4. If `-n 0` was specified: exit with code 0 (no iterations executed, no workflow-level version checking). +4. If `-n 0` was specified: exit with code 0 (no iterations executed, no workflow-level version checking, no `LOOPX_TMPDIR` created). 5. Check the starting workflow's version declaration (section 3.2) against the running loopx version on first entry. -6. Execute the starting target with no input (first iteration). -7. Capture stdout. Parse it as structured output per section 2.3. -8. Increment the iteration counter. -9. If `stop` is `true`: exit with code 0. -10. If `-n` was specified and the iteration count has been reached: exit with code 0. The output from this final iteration is still yielded/observed before termination. -11. If `goto` is present: +6. Create the run-scoped temporary directory `LOOPX_TMPDIR` per section 7.4. This step runs after the starting workflow's version check (step 5) and immediately before the first child spawn. Pre-spawn failures, `-n 0` / `maxIterations: 0` early exits, and aborts observed before this step do not create a tmpdir. +7. Execute the starting target with no input (first iteration). +8. Capture stdout. Parse it as structured output per section 2.3. +9. Increment the iteration counter. +10. If `stop` is `true`: exit with code 0. +11. If `-n` was specified and the iteration count has been reached: exit with code 0. The output from this final iteration is still yielded/observed before termination. +12. If `goto` is present: a. Validate the `goto` value against the target validation rules (section 4.1). If invalid, print an error and exit with code 1. b. Resolve the target: a bare name targets a script in the current workflow; a qualified `workflow:script` targets a specific workflow and script. c. Validate that the resolved workflow exists in the cached discovery results. If not found, print an error and exit with code 1. d. Validate that the resolved script exists in the target workflow. If not found, print an error and exit with code 1. e. If entering a workflow for the first time during this loop run, check its version declaration (section 3.2). f. Execute the resolved target with `result` piped via stdin (or empty stdin if `result` is absent). - g. Return to step 7 with the new script's output. -12. If `goto` is absent: + g. Return to step 8 with the new script's output. +13. If `goto` is absent: a. Re-run the **starting target** with no input. - b. Return to step 7. + b. Return to step 8. **Iteration counting:** `-n` / `maxIterations` counts **every target execution**, including goto hops — not just returns to the starting target. For example, if `ralph:index` outputs `goto: "check-ready"` and `ralph:check-ready` outputs `goto: "review-adr:start"`, that is three iterations. @@ -623,10 +665,18 @@ The first script invocation in a loop receives **no input**. Stdin is empty. ### 7.2 Error Handling -- **Non-zero exit code from a script:** The loop **stops immediately**. loopx exits with code 1. The script's stderr has already been passed through to the terminal. Any stdout produced by the script before it failed is not parsed as structured output. +- **Non-zero exit code from a script:** The loop **stops immediately**. loopx exits with code 1. The script's stderr has already been passed through to the terminal. Any stdout produced by the script before it failed is not parsed as structured output. `LOOPX_TMPDIR` cleanup runs before exit per section 7.4. - **Missing workflow / missing script / missing default entry point:** If the starting target resolves to a workflow that does not exist, a script that does not exist in that workflow, or a bare workflow invocation where `index` is missing, loopx exits with code 1 and prints an error to stderr. These checks occur during target resolution (step 3 in section 7.1) before any iterations run. -- **Invalid `goto` target:** If `goto` contains an invalid target string (section 4.1), references a workflow that does not exist in the cached discovery results, or references a script that does not exist within the target workflow, loopx prints an error message to stderr and exits with code 1. +- **Invalid `goto` target:** If `goto` contains an invalid target string (section 4.1), references a workflow that does not exist in the cached discovery results, or references a script that does not exist within the target workflow, loopx prints an error message to stderr and exits with code 1. `LOOPX_TMPDIR` cleanup runs before exit. - **Missing `.loopx/` directory:** When executing via `loopx run `, if `.loopx/` does not exist, loopx exits with an error instructing the user to create it. +- **`LOOPX_TMPDIR` creation failure:** If any step of the tmpdir creation sequence (per section 7.4) fails, loopx does not spawn any child; the CLI exits with code 1, `run()` throws on first iteration, and `runPromise()` rejects. Best-effort cleanup runs on any partial directory per section 7.4 without masking the original creation error. +- **Child launch / spawn failure after tmpdir creation:** If a child cannot be launched after `LOOPX_TMPDIR` has been created — including a discovered script removed or renamed mid-loop per section 5.1, runtime rejection of a child environment entry from any env tier (most reliably an entry whose name or value contains an embedded NUL byte; the runtime does not distinguish tiers), `exec` failure, or any other pre-first-line-of-user-code spawn-path error — loopx exits with code 1 (CLI), throws (`run()`), or rejects (`runPromise()`). `LOOPX_TMPDIR` cleanup runs before exit. + +**Cleanup idempotence.** loopx performs at most one cleanup attempt per created `LOOPX_TMPDIR` over the lifetime of the run. Racing terminal triggers (SIGTERM during cleanup of a prior SIGINT; abort concurrent with consumer `.return()` / `.throw()`; non-zero exit concurrent with abort; abort after final yield concurrent with a consumer `.throw()`; child spawn failure concurrent with abort; abort concurrent with tmpdir creation failure; etc.) do not start a second cleanup attempt and do not re-emit cleanup warnings. + +**Terminal-outcome precedence.** The selection of which terminal outcome is surfaced is governed by explicit precedence rules elsewhere — the pre-first-`next()` consumer-cancellation carve-out (section 9.1), the abort-wins-over-pre-iteration-failures rule (section 9.3), the CLI pre-iteration signal-wins rule (section 7.3), and the abort-after-final-yield rule (section 9.3). Except where an explicit precedence rule applies, the first terminal trigger observed by loopx determines the surfaced outcome among genuinely racing triggers. "Observed by loopx" refers to the point at which the trigger enters loopx's own tracking (signal handler, abort listener, child-exit callback, consumer interaction) — not OS-level delivery order. Idempotence and warning cardinality are independent of which outcome surfaces. + +**Cleanup warning cardinality.** Every run emits at most one cleanup-related stderr warning. The exact warning text format is implementation-defined; cleanup warnings do not affect the CLI exit code, the generator outcome, or the promise rejection reason. Tmpdir location, mode, identity-fingerprint cleanup safety, symlink / non-directory / mismatched-directory / renamed-away handling, mount-point non-detection, and absence of stale-tmpdir reaping are specified in section 7.4. ### 7.3 Signal Handling @@ -634,8 +684,71 @@ loopx handles process signals to ensure clean shutdown: - **SIGINT / SIGTERM:** The signal is forwarded to the **active child process group** (not just the direct child). This ensures grandchild processes (e.g., agent CLIs spawned by scripts) also receive the signal, preventing orphaned processes. - **Grace period:** After forwarding the signal, loopx waits **5 seconds** for the child process group to exit. If the process group has not exited after 5 seconds, loopx sends SIGKILL to the process group. -- **Exit code:** After the child exits, loopx exits with code `128 + signal number` (standard POSIX convention, e.g., 130 for SIGINT). -- **Between iterations:** If no child process is running (e.g., between iterations), loopx exits immediately with the appropriate signal exit code. +- **Tmpdir cleanup before exit:** When `LOOPX_TMPDIR` has been created, cleanup runs after any active child process group has exited (per the grace period above, including SIGKILL escalation) and before loopx exits with the signal's code. When no child is active, cleanup runs immediately. +- **Exit code:** After the child exits and any tmpdir cleanup completes, loopx exits with code `128 + signal number` (standard POSIX convention, e.g., 130 for SIGINT). +- **Between iterations:** If no child process is running (e.g., between iterations), loopx exits immediately with the appropriate signal exit code (after `LOOPX_TMPDIR` cleanup). + +**Pre-iteration signal-wins precedence (CLI).** SIGINT / SIGTERM observed by loopx's installed pre-iteration signal handler wins over non-signal pre-iteration failures not yet surfaced at that point: loopx cleans up any partial tmpdir and exits with `128 + signal number` regardless of which non-signal pre-iteration step (target syntax validation, `.loopx/` discovery, env-file loading, target resolution, tmpdir creation) would otherwise have failed. The contract is phrased around handler observation, not OS-level delivery — loopx is not required to make pre-iteration async or poll for pending signals between steps. + +Signal-wins precedence begins only after the `run -h` / `--help` short-circuit and usage-level argument parsing have completed *and* loopx has installed its pre-iteration signal handlers. Signals delivered in the pre-handler-installation window (process startup, argument parsing, help short-circuit) are outside this precedence: either the runtime applies POSIX default signal disposition and terminates the process, or parsing completes and the parser-level usage error surfaces with exit code 1. Both outcomes are conforming. + +### 7.4 Run-scoped Temporary Directory (`LOOPX_TMPDIR`) + +For each `loopx run` (CLI) or `run()` / `runPromise()` (programmatic) invocation that reaches execution, loopx creates a unique temporary directory before the first child process spawns and injects its absolute path into every script's environment as `LOOPX_TMPDIR` (see section 8). Scripts use this directory for intra-run scratch state. + +**Parent selection.** The tmpdir parent is `os.tmpdir()` evaluated in loopx's own process environment. The parent is captured on the same schedule as the inherited `process.env` snapshot (section 8): synchronously at the `runPromise()` call site for `runPromise()`; at the first `next()` call for `run()`; pre-iteration for the CLI. `TMPDIR` / `TEMP` / `TMP` entries in env files or `RunOptions.env` reach spawned scripts but do not mutate loopx's own `process.env` and therefore do not redirect the tmpdir parent. Because the parent is captured on the inherited-env schedule, a synchronous `process.env.TMPDIR = …` mutation after `runPromise()` returns does not affect the tmpdir parent for that run, while a mutation between `run()` returning and first `next()` **does**. Callers who need a deterministic parent for `runPromise()` set `TMPDIR` in loopx's inherited environment before the call. + +**Location, naming, and mode.** The directory is created under the selected parent via `mkdtemp` with a `loopx-` prefix, mode `0700`. The exact name format beyond the prefix is implementation-defined. + +**Creation order.** loopx performs the following steps in this exact order before any child is exposed to the path: + +1. `mkdtemp` to create the directory. +2. Capture an identity fingerprint (e.g., device/inode pair) of the created directory. +3. Verify or secure mode `0700`. + +This order is normative because it determines which cleanup-safety guarantee applies if a step fails: + +- **`mkdtemp` itself fails:** no path exists, so no cleanup is needed. +- **Identity capture fails (after `mkdtemp` succeeded):** loopx attempts a single non-recursive `rmdir` on the path. This succeeds on an empty directory (the expected state immediately after `mkdtemp`) and fails without side effects on a symlink, non-directory, or non-empty directory. Without a recorded identity fingerprint, this remains outside the race-resistant guarantee: a same-user process that swapped the original `mkdtemp` directory for a different empty directory before the `rmdir` would have that replacement removed. loopx accepts this narrow trade-off as the cost of cleaning up the common identity-capture-failure case. If `rmdir` fails, loopx emits a single stderr warning and leaves the path in place. +- **Mode-securing fails (after `mkdtemp` and identity capture succeeded):** loopx runs the full identity-fingerprint cleanup-safety routine described below. + +Cleanup failure during creation-failure handling emits at most one warning but does not mask the original creation error. If a SIGINT / SIGTERM or `AbortSignal` abort arrives concurrently with a creation failure, the signal / abort wins and any partial directory is cleaned up under the same rules. + +**Scope and lifecycle.** + +- **Created:** once per run, after the pre-iteration sequence (discovery, env-file loading, target resolution, version check) and immediately before the first child spawns. Pre-spawn failures, `-n 0` / `maxIterations: 0` early exits, and aborts observed before tmpdir creation do not create a tmpdir. +- **Shared across iterations.** All scripts — starting target, intra- and cross-workflow `goto` destinations, loop resets — observe the same `LOOPX_TMPDIR` value. +- **Persisted within the run.** Files written by one script remain visible to later scripts. +- **Concurrent runs are isolated.** Each run receives its own distinct directory. + +**Cleanup triggers.** loopx runs cleanup on every terminal outcome of a run that reached tmpdir creation: + +- Normal completion (`stop: true` from a script, or `-n` / `maxIterations` reached). +- Non-zero script exit; invalid `goto` target; missing workflow or script during `goto` resolution. +- Child launch / spawn failure after tmpdir creation (per section 7.2). +- SIGINT / SIGTERM to loopx — after any active child process group has exited (per section 7.3, including SIGKILL escalation), before loopx exits with the signal's code. If no child is active, cleanup runs immediately. +- Programmatic `AbortSignal` abort — after any active child process group exits per section 9.1, before the generator throws or the promise rejects. +- Consumer-driven cancellation under `run()` (`break` from `for await`, `.return()` / `.throw()` after first `next()`) — after terminating any active child per section 9.1, before the generator settles. + +For `run()`, cleanup on normal completion is guaranteed only once the generator is driven to settlement (`{ done: true }`, `.return()`, `.throw()`, or a `for await` loop that runs to completion). The final yielded `Output` is not itself settlement; a caller that consumes the final output and abandons the generator may leak the tmpdir. + +Cleanup does **not** run when loopx itself is killed via SIGKILL or the host crashes; leaked tmpdirs are expected to be reaped by OS temp-cleaning policy. **loopx does not reap stale tmpdirs at startup.** + +**Identity-fingerprint cleanup safety.** Cleanup is path-based and best-effort — not a sandbox against actively racing same-user processes. At cleanup time, loopx `lstat`s the `LOOPX_TMPDIR` path and dispatches on the outcome: + +1. **Path no longer exists (ENOENT):** no-op. +2. **Path is a symlink:** unlink the symlink entry; do not follow the target. +3. **Path is a regular file, FIFO, socket, or other non-directory non-symlink:** leave in place with a stderr warning. Unlinking would risk mutating unrelated data (hard-link `nlink` decrement, or data renamed into the path with `nlink == 1`). +4. **Path is a directory whose identity matches the recorded identity:** recursively remove. Symlink entries encountered during the walk are unlinked but not traversed, so symlinks pointing outside the tmpdir do not collateral-delete their targets. +5. **Path is a directory whose identity does not match:** leave in place with a stderr warning. loopx does not recursively remove a directory it did not create. + +If the top-level `lstat` fails for any reason other than ENOENT, the `unlink` in case 2 fails, or the recursive removal in case 4 fails, loopx emits a single stderr warning and makes no further changes. Combined with the cleanup idempotence rule in section 7.2, every run emits at most one cleanup warning; the warning text format is implementation-defined and warnings do not affect the CLI exit code, the generator outcome, or the promise rejection reason. + +**Renamed-away tmpdir.** A script that removes or renames its tmpdir during the run defeats automatic cleanup of the moved directory; loopx does not chase renamed tmpdirs. + +**Mount points.** loopx does not detect or avoid mount points inside `$LOOPX_TMPDIR`; scripts that mount into `$LOOPX_TMPDIR` take responsibility for unmounting before cleanup. + +**Race-resistance scope.** A race-resistant guarantee against a same-user process actively mutating the path during cleanup (fd-relative `openat` / `unlinkat` with `AT_SYMLINK_NOFOLLOW`) is out of scope for v1. The path-based, identity-matched best-effort cleanup above is the v1 contract. --- @@ -649,14 +762,14 @@ Global environment variables are stored in the loopx configuration directory at: $XDG_CONFIG_HOME/loopx/env ``` -If `XDG_CONFIG_HOME` is not set, it defaults to `~/.config`, resulting in `~/.config/loopx/env`. +If `XDG_CONFIG_HOME` is not set, the fallback is `$HOME/.config`, resulting in `$HOME/.config/loopx/env`. `HOME` is read from the inherited environment on the same schedule as `XDG_CONFIG_HOME` (see the snapshot-timing paragraph below). The file uses `.env` format with the following rules: - One `KEY=VALUE` pair per line. - **No whitespace is permitted around `=`.** The key extends to the first `=`, and the value is everything after it to the end of the line (trimmed of trailing whitespace). - Lines starting with `#` are comments. **Inline comments are not supported** — a `#` after a value is part of the value. -- Blank lines are ignored. +- Blank lines are ignored. A line containing only whitespace (for example, spaces and/or tabs, after removing the line terminator) is considered blank and is ignored silently without a warning. - Duplicate keys: **last occurrence wins**. - Values are single-line strings. Values may be optionally wrapped in double quotes (`"`) or single quotes (`'`), which are stripped. "Wrapped" means the value begins and ends with the same quote character — if quotes are unmatched (e.g., `KEY="hello` or `KEY='world`), the value is treated literally with no quotes stripped. **No escape sequence interpretation** — content inside quotes is treated literally (e.g., `"\n"` is a backslash followed by `n`, not a newline). - No multiline value support. @@ -666,11 +779,11 @@ If the directory or file does not exist, loopx treats it as having no global var **Concurrent mutation:** Concurrent writes to the same global env file (e.g., multiple simultaneous `loopx env set` calls) are not guaranteed to be atomic in v1. The result is undefined. -**Environment variables are loaded once at loop start and cached for the duration of the loop.** Changes to env files during loop execution are not picked up until the next invocation. +**Environment variables are loaded once at loop start and cached for the duration of the loop.** Changes to env files during loop execution are not picked up until the next invocation. The same once-per-run rule extends to the inherited `process.env`: loopx snapshots the caller's `process.env` once per run and reuses the snapshot for every child spawn in the run; mutations to `process.env` between spawns do not propagate. **Snapshot timing.** For the CLI, the inherited `process.env` snapshot is taken during the pre-iteration sequence at loop start (section 7.1, before the first child spawn). For the programmatic API, the snapshot is lazy under `run()` (captured at first `next()`) and eager under `runPromise()` (captured synchronously at the call site) — see sections 9.1 and 9.2 for the precise contracts. The tmpdir parent (the `TMPDIR` / `TEMP` / `TMP` values that `os.tmpdir()` reads in loopx's own process) is captured on the same schedule as the inherited-env snapshot — see section 7.4. Global env file path resolution (`$XDG_CONFIG_HOME/loopx/env`, with the documented `HOME`-based fallback) reads `XDG_CONFIG_HOME` / `HOME` from the inherited environment on the same schedule. `RunOptions.env` does **not** affect global env-file path resolution — `env` merges into the child environment after env-file loading (section 8.3), so an `XDG_CONFIG_HOME` or `HOME` entry supplied via `RunOptions.env` reaches spawned scripts but does not redirect where loopx looks for the global env file. ### 8.2 Local Override (`-e`) -When `-e ` is specified during execution (`loopx run ` or the programmatic API), the file at `` is read using the same `.env` format rules. If the file does not exist, loopx exits with an error. +When a local env file is specified during execution — via `-e ` on the CLI or `RunOptions.envFile` in the programmatic API — the file is read using the same `.env` format rules. If the file does not exist, loopx exits with an error. **Note:** Under the `loopx run -h` short-circuit, `-e` is not parsed or validated — a missing env file is not an error in that context (see section 4.2). @@ -680,18 +793,25 @@ Local variables are merged with global env vars. Local values take precedence on All resolved environment variables are injected into the script's execution environment alongside the inherited system environment, with the following precedence (highest wins): -1. **loopx-injected variables** (`LOOPX_BIN`, `LOOPX_PROJECT_ROOT`, `LOOPX_WORKFLOW`) — always override any user-supplied values of the same name. -2. **Local env file** (`-e`) values. -3. **Global loopx env** (`$XDG_CONFIG_HOME/loopx/env`) values. -4. **Inherited system environment.** +1. **loopx-injected protocol variables** (`LOOPX_BIN`, `LOOPX_PROJECT_ROOT`, `LOOPX_WORKFLOW`, `LOOPX_WORKFLOW_DIR`, `LOOPX_TMPDIR`) — always override any user-supplied values of the same name. +2. **`RunOptions.env`** (programmatic API only — see section 9.1 / 9.5). +3. **Local env file** (`-e` / `RunOptions.envFile`) values. +4. **Global loopx env** (`$XDG_CONFIG_HOME/loopx/env`) values. +5. **Inherited system environment** (snapshotted once per run — see section 8.1). + +A `RunOptions.env` entry overrides same-named values from `-e`, the global env file, and inherited `process.env`, and is overridden by protocol variables. The CLI has no `RunOptions.env` surface; CLI callers pass per-run values via the shell env prefix (`key=value loopx run …`), which flows through inherited `process.env` at tier 5. -loopx injects the following variables into every script execution: +loopx injects the following protocol variables into every script execution: | Variable | Value | |----------|-------| | `LOOPX_BIN` | Resolved realpath of the effective loopx binary (post-delegation) | | `LOOPX_PROJECT_ROOT` | Absolute path to the project root (section 3.2) | -| `LOOPX_WORKFLOW` | The name of the workflow containing the currently executing script | +| `LOOPX_WORKFLOW` | The name of the workflow containing the currently-spawned script | +| `LOOPX_WORKFLOW_DIR` | Absolute path of the workflow directory containing the currently-spawned script (section 6.1). Refreshed per-spawn alongside `LOOPX_WORKFLOW`. | +| `LOOPX_TMPDIR` | Absolute path to the run-scoped temporary directory (section 7.4). Stable for the lifetime of the run. | + +**`PWD` is not a protocol variable.** loopx neither sets nor unsets `PWD`; the value loopx's own process holds is inherited unchanged. `PWD` is outside this protocol-variable tier and outside the section 13 reserved list. Callers wanting a specific `PWD` in the child supply it via `RunOptions.env` or inherited env, subject to Bash's standard POSIX inheritance behavior. **Note:** For Node.js/tsx, module resolution for `import from "loopx"` is handled via `--import` and a custom resolve hook (see section 3.3), not via `NODE_PATH`. For Bun, `NODE_PATH` is set internally but is not considered a user-facing injected variable. @@ -726,7 +846,11 @@ run("ralph:check-ready") // runs ralph:check-ready `target` is a required parameter. In TypeScript, omitting `target` is a static type error. In JavaScript, or when the type check is bypassed, runtime-invalid `target` values (e.g., `undefined`, `null`, `42`, or any non-string) are rejected lazily: `run()` still returns a generator without throwing, and the error is raised on first iteration (first `next()` call). For example, `run(undefined as any)` returns a generator that throws on first iteration. -**Error timing:** `run()` snapshots its options and `cwd` at call time, but all errors (validation failures, missing workflows, missing scripts, discovery errors, invalid `target`) are surfaced lazily when iteration begins (i.e., on the first `next()` call or equivalent). The `run()` call itself always returns a generator without throwing. +**Option-snapshot timing.** `run()` reads its `options` argument at the call site as a synchronous snapshot, but never throws at the call site. Each option field is read at most once per call, and a throwing getter or proxy trap is not re-invoked to retry. `options.signal` is read **before** other `RunOptions` fields, so an already-aborted signal is captured before any other option-field read can produce a snapshot exception. The order of the remaining option fields (`options.env`, `options.cwd`, `options.envFile`, `options.maxIterations`) is implementation-defined. Any exception raised during the snapshot — a throwing option-field getter, a `Proxy` `ownKeys` trap that throws while enumerating `options.env`, a throwing enumerable getter inside `options.env`, a throwing `Proxy` `get` trap on an included `options.env` key, or a throwing `options.env` getter on the options object itself — is captured and surfaced via the standard pre-iteration error path on the first `next()`, not at the call site. + +**Inherited-env and tmpdir-parent snapshot timing.** Under `run()`, the inherited `process.env` snapshot is **lazy** — captured on the first `next()` call alongside the rest of the pre-iteration sequence. Mutations to `process.env` between `run()` returning and first `next()` **are** observed; later mutations between iterations are not. The tmpdir parent (the `TMPDIR` / `TEMP` / `TMP` values that `os.tmpdir()` reads) is captured on the same schedule, so a mutation to `process.env.TMPDIR` between `run()` returning and first `next()` does affect the tmpdir parent for that run. Global env file path resolution (`XDG_CONFIG_HOME` / `HOME`) also uses this schedule. `LOOPX_TMPDIR` itself is created during the same pre-iteration sequence, between the starting workflow's version check and the first child spawn (section 7.1, step 6); cleanup runs before any error is surfaced (section 7.4 / 9.3). + +**Error timing.** All errors (validation failures, missing workflows, missing scripts, discovery errors, invalid `target`, invalid `options` fields including `options.env`, env-file loading failures, tmpdir-creation failures) are surfaced lazily when iteration begins (i.e., on the first `next()` call or equivalent). The `run()` call itself always returns a generator without throwing. Options can be passed as a second argument: @@ -738,11 +862,13 @@ for await (const output of run("ralph", { maxIterations: 10, envFile: ".env" })) } ``` -**Early termination:** There are two cancellation mechanisms with different semantics: +**Early termination.** There are two cancellation mechanisms with different semantics: + +- **Consumer-driven (`break`, `generator.return()`, `generator.throw()` after first `next()`):** loopx terminates the active child process group (if one is running — SIGTERM, then SIGKILL after 5 seconds) and ensures no further iterations start. If no child process is active at the time of cancellation (e.g., `break` after a yield, between iterations), the generator simply completes with no further yields. This is a silent, clean completion. `LOOPX_TMPDIR` cleanup runs before generator settlement. -- **Consumer-driven (`break`, `generator.return()`):** loopx terminates the active child process group (if one is running — SIGTERM, then SIGKILL after 5 seconds) and ensures no further iterations start. If no child process is active at the time of cancellation (e.g., `break` after a yield, between iterations), the generator simply completes with no further yields. This is a silent, clean completion. +- **AbortSignal:** When the `signal` is aborted, loopx terminates the active child process group (if one is running — SIGTERM, then SIGKILL after 5 seconds) and the generator **throws an abort error**. This applies regardless of whether a child process is active — aborting the signal generally produces an error even if it occurs between iterations or before the first `next()` call. `LOOPX_TMPDIR` cleanup, when applicable, runs before the abort error is thrown. -- **AbortSignal:** When the `signal` is aborted, loopx terminates the active child process group (if one is running — SIGTERM, then SIGKILL after 5 seconds) and the generator **throws an abort error**. This applies regardless of whether a child process is active — aborting the signal always produces an error, even if it occurs between iterations or before the first `next()` call. This follows conventional JavaScript `AbortSignal` semantics. +**Pre-first-`next()` consumer-cancellation carve-out.** A `.return(value)` or `.throw(err)` call as the consumer's *first* interaction with the generator returned by `run()` settles the generator per standard JS async-generator semantics; the loop body is never entered. No pre-iteration step runs and no pre-iteration error is surfaced: captured option-snapshot errors (invalid `options` values, invalid `options.signal`, non-string `options.cwd` / `options.envFile`, invalid `options.maxIterations`, invalid `options.env` shape, throwing option-field getters, throwing `options.env` enumerable getters or proxy traps), invalid `target` values and target-syntax validation, `.loopx/` discovery failures, env-file loading failures, target resolution failures, and tmpdir creation failures are all suppressed. A `signal` captured at call time is not consulted; even an already-aborted signal does not surface the abort error through this path. This carve-out applies only to `run()`; `runPromise()` has no equivalent. Once the consumer's first interaction is `next()`, the body is entered and captured state is observed. ### 9.2 `runPromise(target: string, options?: RunOptions)` @@ -756,14 +882,29 @@ Returns a `Promise` that resolves with an array of all `Output` values `target` is required, same as `run()`. In JavaScript or when the type check is bypassed, `runPromise(undefined as any)` returns a rejected promise rather than throwing synchronously — the call itself always returns a promise, and the validation error surfaces as a rejection. +**Option-snapshot timing.** Identical to `run()` (section 9.1): each option field read at most once, `options.signal` first, throwing getters / traps captured rather than escaping at the call site. + +**Inherited-env and tmpdir-parent snapshot timing.** Under `runPromise()`, the inherited `process.env` snapshot is **eager** — captured synchronously at the `runPromise()` call site, before `runPromise()` returns. Mutations to `process.env` after `runPromise()` returns are not observed. The tmpdir parent is captured on the same schedule, so a synchronous `process.env.TMPDIR = …` mutation after `runPromise()` returns does not affect the tmpdir parent for that run. Global env file path resolution (`XDG_CONFIG_HOME` / `HOME`) also uses this schedule. `LOOPX_TMPDIR` itself is created asynchronously after return, during the same pre-iteration sequence used by the CLI and `run()`. + +There is no `runPromise()` equivalent of the `run()` pre-first-`next()` consumer-cancellation carve-out: any pre-iteration error surfaces via promise rejection. + ### 9.3 Error Behavior The programmatic API has different behavior from the CLI: - **The library never prints `result` to stdout.** All results are returned as structured `Output` objects. -- **Errors throw/reject.** Any condition that would cause the CLI to exit with code 1 (non-zero script exit, invalid `goto`, missing workflow, missing script, validation failures) causes `run()` to throw from the generator and `runPromise()` to reject. +- **Errors throw/reject.** Any condition that would cause the CLI to exit with code 1 (non-zero script exit, invalid `goto`, missing workflow, missing script, validation failures, `LOOPX_TMPDIR` creation failure, child launch / spawn failure after tmpdir creation including runtime env-entry rejections from any tier, `RunOptions.env` shape and snapshot-time errors, invalid `options` and invalid `options.signal`, non-string `options.cwd` / `options.envFile`) causes `run()` to throw from the generator and `runPromise()` to reject. - **Partial outputs are preserved.** When `run()` throws, all previously yielded outputs have already been consumed by the caller. When `runPromise()` rejects, partial outputs are not available (use `run()` if partial results matter). - **Stderr passes through.** Script stderr is still forwarded to the parent process's stderr, same as in CLI mode. +- **Cleanup ordering is observable.** When `LOOPX_TMPDIR` cleanup runs as part of an error path (per section 7.4), it runs **before** the generator throws or the promise rejects. Cleanup warnings emitted during this window do not mask the original throw / rejection reason. + +**Abort precedence over pre-iteration failures.** Once a usable `AbortSignal` has been captured (a real `AbortSignal` or a duck-typed signal that satisfies the section 9.5 contract), an already-aborted signal at call time, or a signal that aborts during pre-iteration before the first child spawn, displaces all other pre-iteration failure modes on the same call: captured option-snapshot errors, target argument / target syntax validation, `.loopx/` discovery, env-file loading, target resolution, and tmpdir creation. An invalid `options` value or non-`AbortSignal`-compatible `options.signal` captures no signal and does not enter this pathway — it remains an ordinary option-snapshot error. Version checking is not a failure mode per section 3.2, so it does not compete. + +**Abort precedence × `maxIterations: 0`.** Abort precedence also applies before the `maxIterations: 0` zero-iteration short-circuit. If a usable signal has already aborted at call time, or aborts before the first child spawn, `maxIterations: 0` does not cause normal zero-iteration completion. Under `runPromise()`, the promise rejects with the abort error rather than resolving with `[]`. Under `run()`, the first `next()` throws the abort error rather than resolving `{ done: true }`. No child is spawned and no `LOOPX_TMPDIR` is created on this path. This rule is still subject to the `run()` pre-first-`next()` consumer-cancellation carve-out: a `.return()` or `.throw()` as the consumer's first interaction suppresses the abort as described in section 9.1. + +**Pinned pre-iteration priority.** Among non-abort pre-iteration errors, project-root-blocking and envFile-path-blocking option errors must surface before the four project-root-dependent failures: a non-string `options.cwd` or throwing `options.cwd` getter, and a non-string `options.envFile` or throwing `options.envFile` getter, surface before `.loopx/` discovery errors, env-file loading errors, target resolution errors, or tmpdir-creation errors. (Otherwise, an unrelated non-options error detected under a garbage project root would obscure the actual cause.) The pinning is scoped to that set only; relative priority between these pinned option errors and target argument / target syntax validation is implementation-defined. Relative priority among the remaining non-abort pre-iteration errors (invalid `options`, invalid `options.signal`, invalid `options.maxIterations`, invalid `options.env` shape and snapshot-time throws, target argument / target syntax validation, and the four project-root-dependent failures) is implementation-defined. + +**Abort after final yield.** An `AbortSignal` that aborts after the final `Output` has been yielded (via `stop: true` or `maxIterations` reached) but before the generator settles via `{ done: true }` produces the abort error on the next generator interaction — `g.next()`, `.return()`, or `.throw()`. Normal completion is fixed only at settlement, not at the final yield. `LOOPX_TMPDIR` cleanup runs before the abort error is surfaced. ### 9.4 `output(value)` and `input()` @@ -785,13 +926,49 @@ interface RunOptions { envFile?: string; signal?: AbortSignal; cwd?: string; + env?: Record; } ``` -- When `signal` is provided and aborted, the active child process group is terminated and the generator/promise completes with an abort error. -- `cwd` specifies the **project root**: the directory from which `.loopx/` is resolved and from which `LOOPX_PROJECT_ROOT` is derived. It does not control the child process working directory — scripts always execute with their workflow directory as cwd (section 6.1). Defaults to `process.cwd()` at the time `run()` or `runPromise()` is called. Code that passes `cwd` expecting it to set the script's working directory must be updated — relative path resolution within the script will be relative to the workflow directory, and `LOOPX_PROJECT_ROOT` (sourced from `RunOptions.cwd` or `process.cwd()`) should be used for project-root-relative paths. -- `maxIterations` counts every target execution, including goto hops. `maxIterations: 0` mirrors CLI `-n 0` behavior: validates and exits without executing any iterations. `maxIterations` must be a non-negative integer; invalid values (negative, non-integer, NaN) cause `run()` to throw on first iteration and `runPromise()` to reject. -- Relative `envFile` paths are resolved against `cwd` if provided, otherwise against `process.cwd()` at call time. +**`options` validity.** `options` must be omitted, `undefined`, or a non-null, non-array, non-function object. Non-conforming values, including throwing getters on the options object, are captured at call time and surfaced via the standard pre-iteration error path rather than escaping at the call site. + +**`signal`.** + +- When present and not `undefined`, `signal` must be an `AbortSignal`-compatible object. Compatibility requires: + - Reading `signal.aborted` must not throw, and the value read must be a boolean. No coercion is applied: missing / `undefined` values, strings such as `"false"`, numbers such as `0`, and any other non-boolean values mean the object is not `AbortSignal`-compatible. + - `addEventListener` must expose an `addEventListener('abort', listener)` method that is callable and returns without throwing when loopx invokes it to register its abort listener. + + A non-compatible `signal` captures no usable signal and is surfaced as an ordinary option-snapshot error under the standard pre-iteration error path. +- `removeEventListener` is not part of the `AbortSignal`-compatibility contract: loopx does not require its presence, and whether loopx invokes it on settlement is implementation-defined. +- When `signal` is provided and aborted, the active child process group is terminated and the generator / promise completes with an abort error (subject to the abort precedence rules in section 9.3 and the `run()` pre-first-`next()` carve-out in section 9.1). +- **Duck-typed signal reentrancy.** For non-`AbortSignal` duck-typed signals: if `addEventListener` synchronously invokes the registered listener during registration, or if `aborted` is observed as `true` at any point during call-time capture, loopx treats the signal as aborted. Real `AbortSignal` instances passed in an already-aborted state must be observed as aborted regardless of implementation latitude. Other reentrant behavior — `aborted` transitioning between reads within the same call, ordering between reading `aborted` and registering the listener for duck-typed signals — is implementation-defined. + +**`cwd`.** + +- When present and not `undefined`, must be a string. A non-string value, or a throwing `cwd` getter, is captured at call time and surfaced via the pre-iteration error path. +- Specifies both the **project root** and the **script execution cwd**. The directory from which `.loopx/` is resolved, from which `LOOPX_PROJECT_ROOT` is derived, and which every spawned script uses as its working directory (section 6.1). The previous "`cwd` does not control script execution cwd" disclaimer no longer applies. +- A relative `cwd` is resolved via `path.resolve(process.cwd(), options.cwd)` once at call time; an absolute `cwd` is used unchanged. No `realpath` / canonicalization is applied (see section 3.2). +- Defaults to `process.cwd()` at the time `run()` or `runPromise()` is called. + +**`envFile`.** + +- When present and not `undefined`, must be a string. A non-string value, or a throwing `envFile` getter, is captured at call time and surfaced via the pre-iteration error path. +- Relative `envFile` paths are resolved against the resolved project root (which equals the child's cwd) when `cwd` is supplied, otherwise against `process.cwd()` at call time. + +**`maxIterations`.** + +- Must be a non-negative integer. Invalid values (negative, non-integer, NaN) cause `run()` to throw on first iteration and `runPromise()` to reject. +- Counts every target execution, including goto hops. `maxIterations: 0` mirrors CLI `-n 0` behavior: validates and exits without executing any iterations. No `LOOPX_TMPDIR` is created under `maxIterations: 0`. `maxIterations: 0` remains subject to the abort-precedence rules in section 9.3. + +**`env`.** + +- **Shape.** Must be omitted, `undefined`, or a non-null, non-array, non-function object whose own enumerable string-keyed entries all have string values. Symbol-keyed, non-enumerable, and inherited properties are ignored. Invalid shapes (null, array, non-object, function, or an entry with a non-string value) are captured at call time and surfaced via the standard pre-iteration error path. +- **Lifetime.** Entries are captured synchronously at call time as a shallow copy — loopx reads the supplied object's own enumerable string-keyed properties once. After loopx has identified the included own enumerable string-keyed keys, it reads each included key's value using ordinary ECMAScript property `[[Get]]` semantics, equivalent to `env[key]`, exactly once during the call-time snapshot. Therefore, for a `Proxy` used as `options.env`, a `get` trap for each included string key is invoked exactly once; if that trap throws, the exception is captured and surfaced via the standard pre-iteration error path. This rule does not require loopx to read symbol-keyed, non-enumerable, or inherited properties; those remain ignored. The strategy used to determine enumerability may invoke descriptor-related proxy traps and remains otherwise implementation-defined unless specified elsewhere. Mutating the original object after `run()` / `runPromise()` returns has no effect on the running loop. Any exception raised during the snapshot (a `Proxy` `ownKeys` trap that throws, a throwing enumerable getter, a throwing `Proxy` `get` trap on an included string key, a throwing `options.env` getter on the options object) is captured and surfaced through the standard pre-iteration error path; it never escapes at the call site. +- **Merge position.** Entries merge into the child environment after global and local env-file loading and before loopx-injected protocol variables (see section 8.3 precedence list). A `RunOptions.env` entry overrides same-named values from `-e`, the global env file, and inherited `process.env`, and is overridden by protocol variables. +- **Applies to every script in the run.** Starting target, intra- and cross-workflow `goto` destinations, and loop resets all receive the same `env` additions. +- **No name validation beyond string-to-string.** loopx does not enforce the POSIX `[A-Za-z_][A-Za-z0-9_]*` name pattern, does not reject `LOOPX_*` keys (they are silently overridden by protocol injection), does not reject `=` or empty-string names, and does not reject NUL bytes. Runtime-level rejections — most reliably a name or value containing an embedded NUL byte — surface as child launch / spawn failures at spawn time (any env tier may be the source; the runtime does not distinguish tiers). `RunOptions.env` shape validation (invalid object types, non-string values, snapshot-time throws) is independent of spawning and surfaces under `maxIterations: 0` just as under any other iteration count. The CLI has no `RunOptions.env` to validate, so this shape-validation path does not apply to `-n 0`; `-n 0` does still validate env files. +- **No CLI surface.** Programmatic-only. CLI callers pass per-run values via the shell env prefix (`key=value loopx run …`), which flows through inherited `process.env`, or via `-e` / `loopx env set` for file-based config. +- **`LOOPX_DELEGATED` is not script-protocol-protected.** Per section 3.2, `LOOPX_DELEGATED` is set only on the inner loopx process during project-root delegation, not on every script spawn. It is not added to the per-spawn protected tier; env files, inherited `process.env`, and `RunOptions.env` may supply it to spawned scripts. --- @@ -803,7 +980,7 @@ loopx install [options] Installs one or more workflows into the `.loopx/` directory, creating it if necessary. -`` is required. `loopx install` with no source is a usage error (exit code 1). See section 4.2 for install-scoped options (`-w`, `-y`, `-h`) and their parsing rules. +`` is required. `loopx install` with no source is a usage error (exit code 1). See section 4.2 for install-scoped options (`-w`, `-y`, `--no-install`, `-h`) and their parsing rules. ### 10.1 Source Detection @@ -896,6 +1073,8 @@ There is no `.loopx/package.json` manifest. Version authority lives in two place ### 10.7 Install Atomicity +This section's atomicity scope covers only the **file-level install** (source acquisition → preflight → stage → commit). Post-commit auto-install behaviors specified in section 10.10 (`.gitignore` safeguard synthesis, `npm install`) run after the commit phase and are **outside** this section's preflight-atomicity guarantee: they do not roll back committed workflow files. The "preflight-atomic" invariant and the stage-then-commit mechanics themselves are unchanged; only the scope boundary is made explicit. See section 10.10 for post-commit failure semantics. + Multi-workflow installs are **preflight-atomic**: no workflows are written until all selected workflows pass preflight and staging completes. Once commit begins, a rare failure may leave a partial install; loopx reports which workflows were and were not committed. **Preflight phase:** All preflight checks — name restriction violations, script-name collisions within a workflow, collisions with existing entries at `.loopx/`, and version mismatches (workflow declares a `loopx` range not satisfied by the running version) — are evaluated for every selected workflow (that is, every workflow that would be installed) before any are written. If any workflow fails any preflight check, the entire install fails, no workflows are written to `.loopx/`, and a single aggregated error is displayed listing all failures across all workflows. When `-y` is present, replaceable workflow-path collisions and version mismatches are recorded during preflight but are not treated as failures; all other validation failures (invalid names, same-base-name collisions, zero-workflow sources, non-workflow destination paths) remain fatal regardless of `-y`. Directories with no script files are silently skipped (they are not workflows) and do not cause a failure. @@ -922,8 +1101,54 @@ When `-w` is used, only the selected workflow is validated. Invalid sibling work ### 10.9 Common Rules -- **loopx does not run `npm install` or `bun install` after cloning/extracting.** For workflows with dependencies, the user must install them manually (e.g., `cd .loopx/my-workflow && npm install`). -- **Install failure cleanup:** Any install failure (download error, HTTP non-2xx, git clone failure, extraction failure, post-download validation failure) exits with code 1. Any partially created staging directory is removed before exit. For single-workflow installs, any partially created target directory at the destination path is also removed before exit. +- **Install failure cleanup (pre-commit failure modes):** Any pre-commit install failure (download error, HTTP non-2xx, git clone failure, extraction failure, post-download validation failure) or staging-phase failure under section 10.7 exits with code 1. Any partially created staging directory is removed before exit. Under the section 10.7 stage-then-commit model, pre-commit failures leave `.loopx/` itself unchanged, so no destination-path cleanup is needed for either single- or multi-workflow installs. This cleanup contract applies **only** to the pre-commit failure modes enumerated here. Post-commit auto-install failures specified in section 10.10 (`.gitignore` safeguard failure, `npm install` non-zero exit, `npm install` spawn failure) still cause exit `1` but do **not** remove already-committed workflow directories from `.loopx/` and do **not** remove a partial `node_modules/` left behind by an interrupted install; those behaviors are governed by section 10.10's aggregate-report / no-rollback rule. +- **HTTP redirect handling.** For tarball downloads, loopx does not follow HTTP redirects in v1. Any HTTP response status outside the 2xx range is treated as an HTTP non-2xx install failure, including 3xx responses. `Location:` headers on 3xx responses are ignored. + +### 10.10 Auto-install Workflow Dependencies + +After the commit phase (section 10.7) completes successfully, unless `--no-install` is present, loopx performs a post-commit auto-install pass over the committed workflows. For each committed workflow with a top-level `package.json`, loopx attempts to populate `node_modules/` by running `npm install`. The trigger is the presence of a top-level `package.json`; whether that file declares any dependencies is not inspected. The pass is best-effort: whether `npm install` actually runs and succeeds for a given workflow is governed by the per-workflow trigger conditions, opt-outs, skips, and failure modes detailed below. + +If the commit phase fails after committing only a subset of workflows, the post-commit auto-install pass is skipped entirely. loopx does not run `npm install` and does not synthesize `.gitignore` safeguards for any workflow, including workflows that were committed before the commit failure. The command reports the commit-phase failure according to section 10.7 and exits with code 1. + +- **Trigger.** Runs once per committed workflow whose top-level `package.json` exists at `.loopx//package.json`, sequentially in an implementation-defined order, with cwd set to the workflow directory. The cross-workflow order of the post-commit auto-install pass is not an external conformance contract; implementations may use their internal commit order or any other implementation-defined order. The required external guarantees are that each committed workflow with a qualifying top-level `package.json` is considered once, npm children do not overlap, and processing follows the failure / signal rules in this section. Workflows without a top-level `package.json` are skipped silently — no warning, no `npm install` invocation, no `.gitignore` synthesis. When `-w ` restricts the install to a single workflow, auto-install considers only that workflow. +- **Package manager.** `npm install` unconditionally. loopx does not inspect `bun.lockb`, `pnpm-lock.yaml`, `yarn.lock`, or a `packageManager` field to select a different manager, and does not pass `--production` / `--omit=dev` / `--ignore-scripts` / any other flags — whatever the workflow's `package.json` and the user's npm configuration prescribe is what runs. Workflow authors needing a different manager pass `--no-install` and invoke their preferred tool manually. +- **Environment.** The `npm install` child inherits loopx's own `process.env` unchanged. `LOOPX_*` protocol variables, `RunOptions.env`, and env-file entries are not injected — these are script-execution-context concerns, not install-time concerns. +- **Output.** npm's stdout and stderr stream through to loopx's stdout and stderr unchanged; loopx neither buffers nor parses npm output and does not introduce a progress indicator of its own. +- **`--no-install` opt-out.** A new install-scoped boolean flag (section 4.2) suppresses auto-install for every selected workflow in the current command. `--no-install` does not suppress any other install-time behavior (source download, preflight validation, stage-then-commit); only the `npm install` step and the `.gitignore` safeguard described below are skipped. +- **`.gitignore` safeguard for `node_modules/`.** Immediately before spawning `npm install` in a workflow directory, loopx checks the top-level `.gitignore` path using `lstat`. + - **Path does not exist (`ENOENT`):** loopx creates a regular file at `.gitignore` containing the single line `node_modules`. + - **Path exists and is a regular file:** loopx leaves it unchanged and proceeds. loopx does not read, parse, validate, append to, or inspect the file's contents or permissions; the workflow author is responsible for any pre-existing regular `.gitignore`. + - **Path exists but is not a regular file** (directory, symlink, FIFO, socket, or other non-regular entry): loopx treats this as a `.gitignore` safeguard failure. The entry is left unchanged, `npm install` is skipped for that workflow, the failure is recorded in the auto-install aggregate report, loopx proceeds to the next workflow, and the failure contributes to final exit code `1`. + - **`lstat` failure other than `ENOENT`, or a write failure when synthesizing `.gitignore`:** loopx treats this as a `.gitignore` safeguard failure under the same aggregate-report / skip-`npm install` semantics. + + A symlink named `.gitignore` does not satisfy the safeguard, even if it resolves to a regular file. Broken and cyclic symlinks are treated as symlink entries and therefore fail the safeguard. The safeguard is skipped entirely under `--no-install` and for workflows that do not have a top-level `package.json` (matching the trigger conditions for `npm install`). + + Committed workflow files are not rolled back on a `.gitignore` safeguard failure. The user can remediate by replacing the `.gitignore` entry with a regular file (or removing it so loopx synthesizes one) and running `cd .loopx/ && npm install`, or by re-running `loopx install -y `. +- **Failure (npm install).** If `npm install` exits non-zero, or fails to spawn (most commonly: `npm` is not on `PATH`), loopx records the failure, proceeds with the next workflow's `npm install`, and exits `1` at the end if any workflow's `npm install` failed. Dependency-install failures do not roll back the file-level install: committed workflows remain on disk in the same partial-install sense as section 10.7's commit-failure clause. Retries: `loopx install -y ` reinstalls from scratch, or `cd .loopx/ && npm install` retries manually. +- **Aggregate failure report.** After the post-commit auto-install pass finishes, if any `.gitignore` synthesis or `npm install` failed for any workflow, loopx emits a final aggregate failure report to stderr that lists each affected workflow together with the underlying failure (`.gitignore` safeguard failure, `npm install` non-zero exit, or `npm install` spawn failure) and then exits `1`. Workflows whose safeguard and `npm install` both succeeded are not listed. If no auto-install failures occurred, no aggregate report is emitted. The aggregate report does not roll back committed workflow files or any partial `node_modules/` left on disk; remediation is the manual procedure described in the preceding bullet and in the `.gitignore` safeguard bullet. +- **Malformed `package.json`.** When the workflow's `package.json` is unreadable, contains invalid JSON, or has an invalid `loopx` semver range (the section 3.2 failure modes), the existing section 3.2 warning is emitted and auto-install **skips that workflow silently** — loopx does not invoke `npm install` against a file that failed version validation and does not add a second warning for the same underlying failure. The `.gitignore` safeguard is also skipped for the same workflow. A skipped workflow is not an auto-install failure and does not appear in the aggregate failure report. +- **Interaction with `-y`.** File-level replacement removes the existing workflow directory (including any previously installed `node_modules/` and any previously synthesized `.gitignore`) before the replacement is committed; the safeguard and `npm install` then run fresh against the replacement's `package.json` and `.gitignore` (or absence thereof). +- **Signals during `npm install`.** SIGINT / SIGTERM received while an `npm install` child is active propagates to the child's process group. loopx waits for the child to exit (the section 7.3 grace period and SIGKILL escalation rules apply by analogy) and then exits with the signal's code. Remaining committed workflows are not processed (no further `.gitignore` synthesis or `npm install` invocations). Partial `node_modules/` state produced before interruption is not cleaned up by loopx. +- **Signals during the auto-install pass when no npm child is active.** SIGINT / SIGTERM observed by loopx during the post-commit auto-install pass aborts the auto-install pass. If no `npm install` child is active when the signal is observed — including between workflows, after an npm child exits and before the next workflow begins, during or after `.gitignore` safeguard processing, or after a safeguard failure before the next workflow — loopx exits with `128 + signal number`, starts no further `.gitignore` safeguards, starts no further `npm install` children, and does not roll back committed workflow files. Any `.gitignore` or `node_modules/` side effects completed before the signal was observed remain on disk. If a synchronous or already-started safeguard operation completes before loopx observes the signal, its side effects remain. Signal termination suppresses the final auto-install aggregate failure report unless it had already been emitted. +- **Trust profile.** Because `npm install` may execute arbitrary code via `preinstall` / `install` / `postinstall` scripts declared in the workflow's dependency graph, `loopx install` inherits the same trust profile as a manual `npm install` in the same directory. Users installing from untrusted sources should pass `--no-install` and inspect the workflow before running `npm install` themselves. + +### 10.11 Install Source Symlinks + +Install source symlinks are handled explicitly during classification, validation, staging, and copy. This rule applies after a git source has been cloned or a tarball source has been extracted and any wrapper directory has been stripped. + +For install-time workflow and script discovery, loopx follows symlinks using the same entry-type policy as runtime discovery: a symlinked workflow directory can qualify as a workflow, and a symlinked script file can qualify as a script. Names are derived from the symlink entry's own name, not from the target's basename. + +During the staging / copy phase, loopx does **not** preserve symlink entries as symlinks in installed workflows. Any symlink that is part of a selected workflow is resolved and materialized into the destination as a regular copied file or directory: + +- A selected top-level workflow entry that is a symlink to a directory is installed as a real directory at `.loopx//`, containing a copy of the symlink target's workflow contents. +- A selected script entry that is a symlink to a file is installed as a real file at the corresponding destination script path, containing a copy of the symlink target's file contents. +- Symlinked non-script files or directories inside a selected workflow's copied content are likewise materialized as real files or directories. + +Symlink targets must resolve to existing paths within the install source root. loopx must reject the install with a preflight / validation error when a symlink that is part of a selected workflow is broken, forms a cycle, or resolves outside the source root. This prevents installed workflows from depending on the temporary clone / extraction directory or from copying arbitrary files outside the source. + +When `-w ` / `--workflow ` is used, only symlinks that are part of the selected workflow are validated and materialized. Symlinks in unselected workflow directories or source-root support directories are ignored, consistent with the selective validation rule in section 10.8. + +Installed workflow symlinks are therefore not preserved as symlinks, and no installed symlink points back into loopx's temporary acquisition or staging directories. --- @@ -944,7 +1169,7 @@ Top-level help does **not** inspect `.loopx/` or perform discovery. `loopx run -h` / `loopx run --help` prints run-specific usage information: -- `run` syntax and options (`-n`, `-e`) +- `run` syntax and options (`-n`, `-e`). Per section 4.1, `run` does not accept `--` as an end-of-options marker and does not accept a named-argument tail; the shell env prefix (`key=value loopx run `) is the sole CLI surface for per-run parameterization. The printed usage reflects these limits. - A dynamically generated list of workflows and their scripts discovered in the current `.loopx/` directory. If a workflow has an `index` script, it is indicated as the default entry point. Run help performs **non-fatal discovery and validation**: @@ -960,10 +1185,10 @@ Run help is the only help form that performs workflow and script discovery. The `loopx install -h` / `loopx install --help` prints install-specific usage information: -- `install` syntax and options (`-w`, `-y`) +- `install` syntax and options (`-w`, `-y`, `--no-install`) - Supported source types -Install help does not require a source argument, does not make network requests, and does not inspect `.loopx/`. +Install help does not require a source argument, does not make network requests, and does not inspect `.loopx/`. The `install -h` / `--help` short-circuit continues to ignore `--no-install` (and every other install-level argument) unconditionally. --- @@ -972,12 +1197,12 @@ Install help does not require a source argument, does not make network requests, | Code | Meaning | |------|---------| | 0 | Clean exit: loop ended via `stop: true`, `-n` limit reached (including `-n 0`), successful subcommand execution, or help display. | -| 1 | Error: script exited non-zero, validation failure, invalid `goto` target, missing workflow, missing script, missing `.loopx/` directory, install failure, or usage error. | -| 128+N | Interrupted by signal N (e.g., 130 for SIGINT). | +| 1 | Error: script exited non-zero, validation failure, invalid `goto` target, missing workflow, missing script, missing `.loopx/` directory, `LOOPX_TMPDIR` creation failure, child launch / spawn failure after tmpdir creation, install failure (including `npm install` non-zero exit or spawn failure under section 10.10, and `.gitignore` safeguard failure under section 10.10), or usage error. | +| 128+N | Interrupted by signal N (e.g., 130 for SIGINT). When `LOOPX_TMPDIR` has been created, cleanup runs before exit per section 7.4. A signal received during `npm install` causes loopx to wait for the npm child to exit before exiting with the signal's code (section 10.10). Cleanup warnings do not affect the exit code or the programmatic outcome. | -Usage errors (exit code 1) include: `loopx run` with no target, `loopx run ralph bar` (extra positional), `loopx foo` (unrecognized subcommand), `loopx ralph` (unrecognized subcommand — no implicit fallback to `run`), `loopx --unknown` (unrecognized top-level flag), `loopx -n 5 ralph` (top-level `-n`), `loopx -e .env ralph` (top-level `-e`), `loopx run --unknown ralph` (unrecognized run flag), `loopx run -n 5 -n 10 ralph` (duplicate run flag), `loopx install` with no source, `loopx install -w a -w b ` (duplicate install flag), and `loopx install --unknown ` (unrecognized install flag). +Usage errors (exit code 1) include: `loopx run` with no target, `loopx run ralph bar` (extra positional), `loopx run -- ralph` (unrecognized `--`), `loopx run -n 1 -- ralph` (unrecognized `--`), `loopx run ralph -- name=value` (unrecognized `--`), `loopx run ralph adr=0003` (extra positional — `name=value` is not interpreted as a named argument), `loopx foo` (unrecognized subcommand), `loopx ralph` (unrecognized subcommand — no implicit fallback to `run`), `loopx --unknown` (unrecognized top-level flag), `loopx -n 5 ralph` (top-level `-n`), `loopx -e .env ralph` (top-level `-e`), `loopx run --unknown ralph` (unrecognized run flag), `loopx run -n 5 -n 10 ralph` (duplicate run flag), `loopx install` with no source, `loopx install -w a -w b ` (duplicate install flag), `loopx install --no-install --no-install ` (duplicate install flag), and `loopx install --unknown ` (unrecognized install flag). -Invalid target strings (e.g., `loopx run ":script"`, `loopx run "workflow:"`, `loopx run "a:b:c"`, `loopx run ""`) are also exit code 1 but are not usage errors — they are rejected after discovery, at the same point as a missing workflow or missing script (section 4.1). +Invalid target strings (e.g., `loopx run ":script"`, `loopx run "workflow:"`, `loopx run "a:b:c"`, `loopx run ""`, `loopx run adr=0003` parsed as a target) are also exit code 1 but are not usage errors — they are rejected after discovery, at the same point as a missing workflow or missing script (section 4.1). Note: A non-zero exit code from any script causes loopx to exit with code 1. Scripts that need error resilience should handle errors internally and exit 0. @@ -985,11 +1210,17 @@ Note: A non-zero exit code from any script causes loopx to exit with code 1. Scr ## 13. Summary of Reserved and Special Values -| Name | Context | Purpose | -|------|---------|---------| -| `LOOPX_BIN` | Env variable | Resolved realpath of the effective loopx binary (post-delegation) | -| `LOOPX_PROJECT_ROOT` | Env variable | Absolute path to the project root (section 3.2) | -| `LOOPX_WORKFLOW` | Env variable | The name of the workflow containing the currently executing script | -| `LOOPX_DELEGATED` | Env variable | Set to `1` during delegation to prevent recursion | -| `index` | Convention | Default entry point script name within a workflow | -| `:` | Delimiter | Reserved separator between workflow and script names in target strings | +Reserved env-var names play one of two distinct roles. **Script-protocol-protected** names are injected on every child spawn and silently override any user-supplied value at the section 8.3 top tier. **Startup-reserved-only** names are inspected on loopx's own startup but are not injected into per-spawn script environments and are not protected from user override on the child. + +| Name | Context | Role | Purpose | +|------|---------|------|---------| +| `LOOPX_BIN` | Env variable | Script-protocol-protected | Resolved realpath of the effective loopx binary (post-delegation) | +| `LOOPX_PROJECT_ROOT` | Env variable | Script-protocol-protected | Absolute path to the project root (section 3.2) | +| `LOOPX_WORKFLOW` | Env variable | Script-protocol-protected | The name of the workflow containing the currently-spawned script. Refreshed per-spawn. | +| `LOOPX_WORKFLOW_DIR` | Env variable | Script-protocol-protected | Absolute path of the workflow directory containing the currently-spawned script (section 6.1). Refreshed per-spawn alongside `LOOPX_WORKFLOW`. | +| `LOOPX_TMPDIR` | Env variable | Script-protocol-protected | Absolute path to the run-scoped temporary directory (section 7.4). Stable for the lifetime of the run. | +| `LOOPX_DELEGATED` | Env variable | Startup-reserved only | Inspected on loopx's own startup as a delegation recursion guard (section 3.2). Set on the inner loopx process during project-root delegation, not on every script spawn. Env files, inherited `process.env`, and `RunOptions.env` may supply this name to spawned scripts; it is not protected at the section 8.3 top tier. | +| `index` | Convention | — | Default entry point script name within a workflow | +| `:` | Delimiter | — | Reserved separator between workflow and script names in target strings | + +**`PWD` is intentionally not reserved.** loopx neither sets nor unsets `PWD`; the value loopx's own process holds is inherited unchanged. `PWD` is outside the script-protocol-protected tier, outside the startup-reserved tier, and outside section 8.3's protocol-variable tier. See sections 6.1 and 8.3. diff --git a/TEST-SPEC.md b/TEST-SPEC.md index 14ff1c0..e7dc6df 100644 --- a/TEST-SPEC.md +++ b/TEST-SPEC.md @@ -27,19 +27,25 @@ ### 1.3 Coverage Scope -This suite is the **implementation-driving** test suite — it defines the behavior that must pass before a feature is considered complete. It targets complete coverage of all SPEC.md requirements, including: +**Platform scope: POSIX-only (macOS, Linux).** Per SPEC 1, v1 is POSIX-only and Windows is not supported. This test suite is correspondingly **POSIX-only by design** — tests exercise POSIX behaviors (process groups, `getcwd(3)` canonicalization, `realpath`, signal disposition, `os.tmpdir()` under `TMPDIR` semantics, `stat(3)` device/inode identity, unix file modes, symlinks, shell-managed `$PWD`, etc.) and are not expected to pass on Windows. CI runs POSIX matrix only (macOS, Linux × Node.js ≥ 20.6 and Bun ≥ 1.0). No Windows coverage is targeted or advertised. If a test happens to pass on Windows it is incidental, not a guarantee; if it fails, that is not a bug against this suite. + +This suite is the **implementation-driving** test suite — it defines the behavior that must pass before a feature is considered complete. It targets **near-complete coverage** of SPEC.md requirements — complete except for explicitly marked known gaps that depend on privileged operations unavailable in standard CI or on narrow timing windows for which no test-only seam is currently declared. The known gaps are enumerated in-line at the relevant sections (mount-point non-detection T-TMP-43 and host-crash cleanup non-guarantee T-TMP-44, and the cleanup-safety symlink-`unlink`-failure branch within creation-failure handling under `LOOPX_TEST_TMPDIR_FAULT=mode-secure-fail` — the partial directory created by `mkdtemp` is never a symlink, so cleanup-safety rule-2 dispatch is unreachable in that composition without an additional swap-to-symlink seam between identity capture and the cleanup-safety dispatch — see section 4.7; the pre-handler-installation signal-window clause T-SIG-26, the narrow post-tmpdir-pre-spawn signal window T-SIG-29, the programmatic pre-iteration abort window T-API-10i, and the partial-tmpdir-creation-racing-signal/abort window T-SIG-30b / T-API-10j — see section 4.11; implementation-private install staging-directory cleanup T-INST-79a — see section 4.10) and again in the Appendix A traceability matrix's "Known gaps" annotations. Previously-tracked seam-dependent gaps (tmpdir creation sub-steps T-TMP-12d / T-TMP-12e, cleanup-failure branches T-TMP-40 / T-TMP-41 / T-TMP-42, cleanup-failure during creation-failure handling T-TMP-12d2 / T-TMP-12e2 / T-TMP-12e3, cleanup idempotence / warning cardinality T-TMP-38 / T-TMP-39 / T-TMP-38a / T-TMP-38b / T-TMP-38c / T-TMP-38d / T-TMP-38d2 / T-TMP-38e, terminal-outcome precedence races T-TERM-01..T-TERM-04 across both observation orders including the `.throw()` axis T-TERM-02c / T-TERM-02d, and the auto-install aggregate-report-already-emitted carve-out T-INST-116l) have been promoted to direct conformance pins via the section 1.4 fault-injection seams `LOOPX_TEST_TMPDIR_FAULT` (extended with the `identity-capture-fail-rmdir-fail` compound value), `LOOPX_TEST_CLEANUP_FAULT`, and the pause sentinels `LOOPX_TEST_TERMINAL_TRIGGER_PAUSE` (extended with `consumer-return-observed`, `consumer-throw-observed`, and `child-spawn-attempt` window values) and `LOOPX_TEST_AUTOINSTALL_PAUSE` (the last extended with a `post-aggregate-report` ordinal value). Beyond those tracked gaps, the suite covers, including: - **Spec 3.1 (Global Install):** Covered by T-INST-GLOBAL-01, which exercises the full `npm pack` → install into isolated global prefix → run against fixture project workflow. This runs in CI on every build. -- **Spec 7.3 (Signal Handling — between iterations):** Covered by T-SIG-07, which sends a signal between iterations by coordinating via marker files. Tagged `@flaky-retry(3)` due to inherent timing sensitivity. The active-child signal cases (T-SIG-01–06) are fully covered without retry. +- **Spec 7.3 (Signal Handling — between iterations):** Covered by T-SIG-07 (immediate-exit contract) and T-SIG-07a (tmpdir cleanup also runs on the between-iterations window — SPEC 7.3 / 7.4 "When no child is active, cleanup runs immediately"), both of which target the between-iterations window by (a) waiting for iteration 1's child PID to exit via polled `kill(pid, 0)` returning ESRCH, and then (b) delivering the signal before iteration 2's long-sleeping child has emitted its ready marker. Tagged `@flaky-retry(3)` due to the narrow residual race between iteration 2's fork-exec and its sleep installation — no between-iterations sentinel seam is declared, so this window is observed via PID-exit polling rather than a deterministic marker. The active-child signal cases (T-SIG-01–06) are fully covered without retry. - **Spec 9.1 (Async Generator Cancellation):** Multiple cancellation scenarios are tested: "break after yield" (T-API-06), "return during pending next" (T-API-09a), "abort signal during active child" (T-API-10a), "pre-aborted signal" (T-API-10b), and "abort between iterations" (T-API-10c). +**Test-construction sanity for getter/proxy variants.** Several SPEC 9.5 "snapshot-time throws" tests assert that loopx captures a throwing enumerable getter or `Proxy` `ownKeys` trap during the synchronous option-snapshot pass and surfaces the captured exception via the standard pre-iteration error path — i.e., the exception originates **inside loopx**, not at the test call site. Because object-spread (`{ ...obj, foo }`) reads the spread source's own enumerable string-keyed properties at the spread expression, **spreading an object whose getter or proxy trap is the behavior under test invokes the throwing accessor in the test harness before `run()` / `runPromise()` is called**, which surfaces the exception at the test call site rather than letting loopx capture it. That short-circuit invalidates the assertion — the test would then be exercising the test harness's own evaluation order, not the SPEC 9.5 snapshot path. **Construction rule for getter / proxy variants**: build the final options object directly — install the throwing accessor on the same object passed to `run()` / `runPromise()` using `Object.defineProperty`, or assemble the options field-by-field — **never via object-spread of an object whose getters or proxy traps are the behavior under test**. The same rule applies to spreading an env value object whose entry getter throws or whose proxy `ownKeys` trap throws: pass the env object as the `env` field's value (a plain reference read), not as a spread source whose enumeration triggers the trap. Tests covering this surface (T-API-55c, T-API-55d, T-API-62, T-API-62e, T-API-62f, T-API-65f, T-API-65g, T-API-65p, T-API-65q, and others) use the field-by-field / `Object.defineProperty` construction pattern. + ### 1.4 Internal Test Seams -The implementation **must** expose the following pure functions as package-private imports for unit and fuzz testing. These are not part of the public API and are not documented in SPEC.md — they exist solely to enable high-volume testing. +All items in this section are **test-only harness seams** — they are **non-public**, **required for this test suite**, and **not part of SPEC conformance**. A production consumer of loopx must never depend on them, and their presence or shape is not part of the public semver contract. They exist solely to enable high-volume fuzz testing (section 5.1, 5.2) and deterministic coverage of failure paths that are otherwise unreachable black-box. -**Required exports (via a subpath like `loopx/internal` or a `src/internal.ts` barrel):** +**Pure-function exports (non-public, test-only):** + +The test suite expects the following pure functions to be importable via a subpath like `loopx/internal` or a `src/internal.ts` barrel. The exact module path and export mechanism is an implementation detail (TypeScript `paths` aliases, a `package.json` `exports` subpath, or a direct relative import are all acceptable) — the only requirement is that the tests can reach them. | Function | Signature | Purpose | |----------|-----------|---------| @@ -47,24 +53,118 @@ The implementation **must** expose the following pure functions as package-priva | `parseEnvFile` | `(content: string) => { vars: Record, warnings: string[] }` | Parses `.env` file content per Spec 8.1 rules. Returns parsed variables and any warning messages for invalid lines | | `classifySource` | `(source: string) => { type: "git" \| "tarball", url: string }` | Classifies an install source per Spec 10.1 rules. Throws for rejected sources (e.g., single-file URLs) | -**Design constraints:** +**Design constraints (for the test suite's use):** -- These functions must be **pure** — no I/O, no process spawning, no side effects. They take a string and return a value. +- These functions should be **pure** — no I/O, no process spawning, no side effects. They take a string and return a value. - The `warnings` field in `parseEnvFile` returns the warning messages that would be printed to stderr during normal operation. This allows unit tests to assert on warning behavior without capturing stderr from a child process. -- The exact module path and export mechanism is an implementation detail, but the test suite must be able to `import { parseOutput } from "loopx/internal"` (or equivalent). The implementation may use TypeScript `paths` aliases, a `package.json` `exports` subpath, or a direct relative import from the test files. -- These exports are **not** part of the public semver contract. They may change shape between minor versions. +- These exports are **not** part of the public semver contract. SPEC.md does not reference or constrain them; they exist solely for this test suite. -**These seams are a hard implementation requirement.** Unit tests (section 6.1, 6.2) and high-volume fuzz tests (section 5.1, 5.2) depend on them. The implementation is not considered complete until these exports are available and importable by the test suite. +These seams are what unit tests (section 6.1, 6.2) and high-volume fuzz tests (section 5.1, 5.2) import. Without them those tests fall back to slow E2E coverage or cannot exist at all — but a SPEC-conforming implementation without these exports still satisfies SPEC.md. -**Install fault injection (environment variable seam):** +**Install fault injection (test-only env-var seam, non-public, not part of SPEC conformance):** -Install atomicity tests (T-INST-80c) must simulate commit-phase failures that are otherwise unreproducible in black-box tests (the commit phase involves only renames, which rarely fail on a healthy filesystem). The implementation must honor the following test-only environment variable: +Install atomicity tests (T-INST-79, T-INST-80b, T-INST-80c, T-INST-80c2, T-INST-97a), auto-install safeguard tests (T-INST-112c, T-INST-112d, T-INST-112g, T-INST-112h, T-INST-112i, T-INST-112j, T-INST-112k), per-workflow auto-install spawn-failure tests (T-INST-114b), and auto-install no-active-child signal-window tests (T-INST-116h, T-INST-116i, T-INST-116j, T-INST-116k) must simulate failures or pause loopx at deterministic points that are otherwise unreproducible in black-box tests (staging-phase failures on healthy filesystems depend on archive-shape / permission tricks that are not SPEC-required to fail; commit-phase renames rarely fail on a healthy filesystem; `.gitignore` write failures depend on non-deterministic post-commit filesystem state; `lstat` failures other than `ENOENT` on the top-level `.gitignore` path depend on non-deterministic post-commit filesystem state and parent-directory permission state that are not reachable from ordinary install inputs; pre-existing symlink, FIFO, and socket `.gitignore` entries cannot be produced through ordinary install inputs because SPEC 10.11 materializes source symlinks before commit and ordinary install fixtures cannot place FIFOs or sockets at `.loopx//.gitignore`; pre-existing regular `.gitignore` entries with mode `000` depend on the implementation's staging-strategy preservation of source mode bits, which SPEC 10.7 does not require; selective per-invocation `npm` spawn failures cannot be driven by a `PATH`-based shim once a shim process has started; signal delivery during the auto-install pass when no `npm install` child is active hits sub-millisecond windows between safeguard / spawn / wait operations that cannot be deterministically targeted via real-time delays). The test suite assumes the implementation honors the following test-only environment variables: | Variable | Values | Behavior | |----------|--------|----------| | `LOOPX_TEST_INSTALL_FAULT` | `commit-fail-after:` | During the install commit phase, simulate a rename failure (reported as `EACCES`) after `n` successful workflow commits. | +| `LOOPX_TEST_INSTALL_FAULT` | `staging-fail:[,…]` | During the install staging phase (section 10.7 stage-then-commit), simulate a staging-write failure (reported as `EACCES`) for the named workflow(s) before any bytes are written to the staging directory for that workflow. This is the staging-phase counterpart to `commit-fail-after` and is what drives the T-INST-79 / T-INST-80b / T-INST-97a staging-failure coverage deterministically. SPEC 10.7 defines the failure outcome ("the staging directory is cleaned up and `.loopx/` is left unchanged. The install fails with an error identifying the failing workflow and the underlying cause"); the seam decouples that outcome from a specific source-shape fixture (e.g., tarball mode-000 files). Without this seam, the only black-box path is a mode-000 file, and SPEC does not require mode-000 to cause staging to fail — an implementation that streams from the archive, chmods extracted files before copying, or otherwise avoids a literal copy of the unreadable file is conforming and would not fail staging. Multiple values combine with `commit-fail-after` in a single `LOOPX_TEST_INSTALL_FAULT` value by listing them semicolon-separated (e.g., `staging-fail:alpha;commit-fail-after:1`) — the exact combination grammar is implementation-defined, but at minimum each fault type must be usable on its own. | +| `LOOPX_TEST_AUTOINSTALL_FAULT` | `gitignore-write-fail:[,…]` | During the post-commit auto-install pass, simulate a `.gitignore` write failure (reported as `EACCES`) for the named workflow(s). The safeguard-write hook that section 10.10 describes as "Immediately before spawning `npm install`" checks the comma-separated list and, for any match, returns the `EACCES` write-failure path without actually attempting to create `.gitignore` on disk. This enables deterministic coverage of T-INST-112c without relying on real filesystem permissions (which vary across CI environments and are unreachable under `process.getuid() === 0`). | +| `LOOPX_TEST_AUTOINSTALL_FAULT` | `npm-spawn-fail:[,…]` | During the post-commit auto-install pass, simulate an `npm` spawn failure (reported as `ENOENT`) for the named workflow(s). The spawn hook that section 10.10 describes as "`npm install` spawn failure (most commonly: `npm` is not on `PATH`)" checks the comma-separated list and, for any match, returns the spawn-failure path without actually invoking `spawn("npm", …)`. Multiple values combine in a single `LOOPX_TEST_AUTOINSTALL_FAULT` value by listing them semicolon-separated (e.g., `gitignore-write-fail:alpha;npm-spawn-fail:beta`) — the exact combination grammar is implementation-defined, but at minimum each fault type must be usable on its own. This enables deterministic per-workflow coverage of SPEC 10.10 "spawn failure on one workflow does not prevent `npm install` on subsequent workflows" (T-INST-114b), which a black-box `PATH`-based seam cannot drive: once a shim process has started, the spawn itself has already succeeded, so a shim cannot make only the second `spawn("npm")` fail with `ENOENT` while allowing the first and third to succeed. | +| `LOOPX_TEST_AUTOINSTALL_FAULT` | `gitignore-replace-with-symlink:[=][,[=]…]` | During the post-commit auto-install pass for the named workflow(s), immediately **after** the commit phase has placed any source `.gitignore` content (per SPEC 10.11 materialization) and **before** the per-workflow safeguard `lstat`, replace `.loopx//.gitignore` (or create the path if it does not exist) with a symlink so that the safeguard's `lstat` observes a symlink entry. The optional `` selects which symlink shape is materialized: `regular-file-target` (default — symlink whose target is a regular file inside the workflow directory containing `node_modules`, exercising the "even if it resolves to a regular file" clause), `broken` (symlink to a non-existent target inside the workflow directory), or `cycle` (two symlinks `.gitignore -> .gitignore-loop` and `.gitignore-loop -> .gitignore` forming a cycle). Multiple values combine semicolon-separated with the other `LOOPX_TEST_AUTOINSTALL_FAULT` faults (e.g., `gitignore-replace-with-symlink:alpha=broken;npm-spawn-fail:beta`). This enables deterministic coverage of T-INST-112g — the symlink branch of SPEC 10.10's non-regular `.gitignore` enumeration ("A symlink named `.gitignore` does not satisfy the safeguard, even if it resolves to a regular file. Broken and cyclic symlinks are treated as symlink entries and therefore fail the safeguard") — which is otherwise unreachable through ordinary install inputs because SPEC 10.11 materializes source symlinks as regular files / directories before commit, so a committed symlink at `.loopx//.gitignore` cannot be produced by any black-box install fixture. | +| `LOOPX_TEST_AUTOINSTALL_FAULT` | `gitignore-lstat-fail:[,…]` | During the post-commit auto-install pass for the named workflow(s), simulate an `lstat` failure other than `ENOENT` (reported as `EACCES`) when the safeguard inspects the top-level `.gitignore` path. The `lstat` hook that section 10.10 describes as "loopx checks the top-level `.gitignore` path using `lstat`" checks the comma-separated list and, for any match, returns the `EACCES` `lstat`-failure path without actually invoking `lstat` on disk. Multiple values combine semicolon-separated with the other `LOOPX_TEST_AUTOINSTALL_FAULT` faults (e.g., `gitignore-lstat-fail:alpha;npm-spawn-fail:beta`). This enables deterministic coverage of T-INST-112h — the SPEC 10.10 "`lstat` failure other than `ENOENT`" branch of the safeguard-failure enumeration — which is otherwise unreachable through ordinary install inputs (a non-`ENOENT` `lstat` failure on a workflow-root entry depends on parent-directory permission state and other non-deterministic filesystem conditions that are not reproducible across CI environments and are unreachable under `process.getuid() === 0`). | +| `LOOPX_TEST_AUTOINSTALL_FAULT` | `gitignore-replace-with-fifo:[,…]` | During the post-commit auto-install pass for the named workflow(s), immediately **after** the commit phase has placed any source `.gitignore` content (per SPEC 10.11 materialization) and **before** the per-workflow safeguard `lstat`, replace `.loopx//.gitignore` (or create the path if it does not exist) with a FIFO (named pipe, e.g., via `mkfifo(2)`) so that the safeguard's `lstat` observes a FIFO entry. Multiple values combine semicolon-separated with the other `LOOPX_TEST_AUTOINSTALL_FAULT` faults. This enables deterministic coverage of T-INST-112i — the FIFO branch of SPEC 10.10's non-regular `.gitignore` enumeration ("directory, symlink, FIFO, socket, or other non-regular entry") — which is otherwise unreachable through ordinary install inputs because git transports do not preserve FIFOs and tarball transports cannot carry FIFO entries through standard archive formats. | +| `LOOPX_TEST_AUTOINSTALL_FAULT` | `gitignore-replace-with-socket:[,…]` | During the post-commit auto-install pass for the named workflow(s), immediately **after** the commit phase has placed any source `.gitignore` content and **before** the per-workflow safeguard `lstat`, replace `.loopx//.gitignore` (or create the path if it does not exist) with a Unix-domain socket (e.g., via `bind(2)` on an `AF_UNIX` socket bound at the path) so that the safeguard's `lstat` observes a socket entry. Multiple values combine semicolon-separated with the other `LOOPX_TEST_AUTOINSTALL_FAULT` faults. This enables deterministic coverage of T-INST-112j — the socket branch of SPEC 10.10's non-regular `.gitignore` enumeration — which is otherwise unreachable through ordinary install inputs (sockets, like FIFOs, are not preserved through git or tarball transports). The seam may keep the socket bound through the safeguard `lstat` and unbind it after the test asserts on the entry's persistence; whether the seam closes the socket immediately after creating the on-disk entry is implementation-defined as long as the entry itself is observable as a socket via `lstat` at the moment the safeguard runs. | +| `LOOPX_TEST_AUTOINSTALL_FAULT` | `gitignore-make-unreadable:[,…]` | During the post-commit auto-install pass for the named workflow(s), immediately **after** the commit phase has placed any source `.gitignore` content (per SPEC 10.11 materialization) and **before** the per-workflow safeguard `lstat`, `chmod` `.loopx//.gitignore` to mode `000` — but **only when the on-disk entry is a regular file** (the seam is a no-op when the entry is absent or non-regular, so it composes safely with the `gitignore-replace-with-symlink:` / `-fifo:` / `-socket:` seams without colliding on the same workflow name). Multiple values combine semicolon-separated with the other `LOOPX_TEST_AUTOINSTALL_FAULT` faults (e.g., `gitignore-make-unreadable:alpha;npm-spawn-fail:beta`). This enables deterministic coverage of T-INST-112k — the SPEC 10.10 regular-file branch's "loopx does not read, parse, validate, append to, or inspect the file's contents or **permissions**" clause — which is otherwise unreachable through ordinary install inputs (producing a committed regular `.gitignore` with mode `000` requires either tarball install with mode preservation through the staging-then-commit pipeline, which SPEC 10.7 does not require, or post-commit chmod after the file lands; without the seam the test is environment-sensitive and unreachable under `process.getuid() === 0`). **Mode-restoration responsibility.** The seam **does not restore** the file's mode bits — loopx leaves the on-disk `.gitignore` at mode `000` when its own auto-install pass completes, so that T-INST-112k can assert post-run that the mode bits remain `000` (proving loopx never chmod'd the file readable in order to inspect its contents or permissions). The test harness is responsible for restoring the file mode after its own assertions complete: T-INST-112k's `afterEach` (or post-assertion teardown) must `chmod` the file back to a readable value before fixture-cleanup logic attempts to read or remove it, otherwise fixture cleanup will fail with `EACCES` under `process.getuid() !== 0`. This split — loopx leaves mode `000`, harness restores in `afterEach` — is the cleanest division: it lets the test assert directly on the post-run on-disk mode bits without racing an implementation-side restore, while keeping fixture cleanup reliable across CI environments. | + +These variables are only checked when `NODE_ENV=test` to prevent accidental use in production. Like the function seams above, they are not part of the public API and may change between versions without a semver bump. -This variable is only checked when `NODE_ENV=test` to prevent accidental use in production. Like the function seams above, this is not part of the public API and may change between versions. +**Tmpdir creation fault injection (test-only env-var seam, non-public, not part of SPEC conformance):** + +The tmpdir-creation sub-step coverage tests (T-TMP-12d, T-TMP-12e) need to deterministically force `mkdtemp` to succeed while a subsequent sub-step of SPEC 7.4's creation order (identity capture or mode securing) fails. Black-box reproduction is unreliable because the same-process owner is performing the operation on a directory it just created and still has read permission on (so `stat` won't fail), and the same process also sets the directory's mode (so `chmod` won't fail). The test suite assumes the implementation honors the following test-only environment variable: + +| Variable | Values | Behavior | +|----------|--------|----------| +| `LOOPX_TEST_TMPDIR_FAULT` | `identity-capture-fail` | After `mkdtemp` succeeds during `LOOPX_TMPDIR` creation (SPEC 7.4 sub-step 1), simulate a failure of the identity-capture stat (sub-step 2) — reported as `EACCES`. The implementation observes the simulated failure at the same point an actual `stat` failure would be observed, so the SPEC 7.4 "single non-recursive `rmdir`" cleanup path runs on the partial directory created by `mkdtemp`. This enables deterministic coverage of T-TMP-12d. | +| `LOOPX_TEST_TMPDIR_FAULT` | `identity-capture-fail-rmdir-fail` | Compound fault: both identity capture (SPEC 7.4 sub-step 2) and the subsequent single non-recursive `rmdir` on the partial directory fail with `EACCES`. The identity-capture failure observes at the same point as the `identity-capture-fail` value above, so loopx enters the SPEC 7.4 "single non-recursive `rmdir`" creation-failure cleanup path; the `rmdir` then itself observes a simulated `EACCES`. This enables deterministic coverage of T-TMP-12d2 — SPEC 7.4's "If `rmdir` fails, loopx emits a single stderr warning and leaves the path in place" branch on the creation-failure-cleanup-failure axis, which is otherwise unreachable because a same-user-owned empty directory cannot be reliably made un-`rmdir`-able through ordinary file-system inputs. | +| `LOOPX_TEST_TMPDIR_FAULT` | `mode-secure-fail` | After `mkdtemp` and identity capture both succeed, simulate a failure of mode-securing (sub-step 3) — reported as `EACCES`. The implementation observes the simulated failure at the same point an actual `chmod` / mode-verification failure would be observed, so the SPEC 7.4 "full identity-fingerprint cleanup-safety routine" runs on the partial directory. This enables deterministic coverage of T-TMP-12e. **Combined with `LOOPX_TEST_CLEANUP_FAULT`.** `LOOPX_TEST_CLEANUP_FAULT` applies to the cleanup-safety routine regardless of whether it was invoked from a normal terminal-cleanup path or from creation-failure handling, so when set in the same run as `mode-secure-fail` the cleanup-safety routine invoked under `mode-secure-fail` observes the configured cleanup-failure when its dispatch reaches the corresponding rule. **Reachability of each cleanup-fault under `mode-secure-fail`:** `LOOPX_TEST_CLEANUP_FAULT=lstat-fail` is reachable — the top-level `lstat` is the first step of the cleanup-safety routine and runs unconditionally, so the seam fires (used by T-TMP-12e3). `LOOPX_TEST_CLEANUP_FAULT=recursive-remove-fail` is reachable — the partial directory's identity matches the recorded fingerprint (loopx just `mkdtemp`'d it and captured its identity in sub-step 2), so dispatch reaches rule 4 and the recursive-remove proceeds (used by T-TMP-12e2). `LOOPX_TEST_CLEANUP_FAULT=symlink-unlink-fail` is **not** reachable in this composition — the partial tmpdir is the directory created by `mkdtemp` in sub-step 1, never a symlink, so cleanup-safety dispatch never reaches rule 2 and the seam is a no-op; deterministic coverage of the symlink-unlink-fail branch within creation-failure handling would require a separate seam that swaps the partial directory for a symlink between sub-step 2 and the cleanup-safety dispatch, which is not provided. | + +Multiple values combine semicolon-separated, but only one fault per run is meaningful (the implementation reaches at most one of the two failure points before aborting tmpdir creation). This variable is only honored when `NODE_ENV=test`. Like the install-fault seams above, it is not part of the public API. + +**Cleanup fault injection (test-only env-var seam, non-public, not part of SPEC conformance):** + +SPEC 7.4 specifies three cleanup-failure branches that black-box tests cannot reliably reproduce on a same-user-owned directory under POSIX semantics: (1) the top-level `lstat` returning a non-ENOENT failure, (2) the rule-2 symlink `unlink` failing, and (3) the rule-4 recursive removal failing on an identity-matched directory. The test suite assumes the implementation honors the following test-only environment variable: + +| Variable | Values | Behavior | +|----------|--------|----------| +| `LOOPX_TEST_CLEANUP_FAULT` | `lstat-fail` | During cleanup of `LOOPX_TMPDIR`, simulate the top-level `lstat` returning a non-ENOENT failure (reported as `EACCES`). The cleanup routine observes the simulated failure at the same point an actual `lstat` failure would be observed, so the SPEC 7.4 "single stderr warning, no further changes" path runs. This enables deterministic coverage of T-TMP-40. | +| `LOOPX_TEST_CLEANUP_FAULT` | `symlink-unlink-fail` | During cleanup of `LOOPX_TMPDIR` when the dispatch decided the path is a symlink (per SPEC 7.4 cleanup-safety rule 2), simulate the `unlink` call failing with `EACCES`. The cleanup routine observes the simulated failure at the same point an actual `unlink` failure would be observed, so the SPEC 7.4 "single stderr warning, no further changes" path runs. The seam fires only when the dispatch reached rule 2 — for non-symlink paths the seam is a no-op. This enables deterministic coverage of T-TMP-41. | +| `LOOPX_TEST_CLEANUP_FAULT` | `recursive-remove-fail` | During cleanup of `LOOPX_TMPDIR` when the dispatch decided the path is an identity-matched directory (per SPEC 7.4 cleanup-safety rule 4), simulate the recursive removal failing with `EACCES`. The exact failure point within the recursive walk is implementation-defined (e.g., a synthetic failure at the first nested `unlink`, or at the top-level `rmdir` that closes out the walk); the seam guarantees the cleanup routine observes a failure on the recursive-remove path in the same way an actual recursive-remove failure would surface. The seam fires only when the dispatch reached rule 4 — for other paths the seam is a no-op. This enables deterministic coverage of T-TMP-42. The seam applies whenever the cleanup-safety routine is invoked, regardless of whether the invocation comes from a normal terminal cleanup path (T-TMP-42) or from the SPEC 7.4 creation-failure handling path under `LOOPX_TEST_TMPDIR_FAULT=mode-secure-fail` (T-TMP-12e2). | + +Multiple values combine semicolon-separated, but only one cleanup fault per run is meaningful (the dispatch reaches at most one of the three failure points for any given cleanup invocation). This variable is only honored when `NODE_ENV=test`. + +**Pre-iteration sentinel stderr marker (test-only, non-public, not part of SPEC conformance):** + +The CLI pre-iteration signal-wins tests (T-SIG-20, T-SIG-20a, T-SIG-21, T-SIG-22, T-SIG-23, T-SIG-24, T-SIG-25, T-SIG-27, T-SIG-28) need to synchronize signal delivery with the narrow window between the CLI's pre-iteration signal-handler installation and the first child spawn. To avoid relying on fragile real-time delays or fixture-triggered blocking `opendir`s, the test suite assumes loopx emits an implementation-defined sentinel line to stderr at pre-iteration start — for example `loopx: pre-iteration-begin` — immediately after installing its pre-iteration signal handlers and before starting discovery, env-file loading, target resolution, or tmpdir creation. The harness uses `waitForStderr()` to observe the line and then sends SIGINT / SIGTERM during the remaining pre-iteration window. + +**Sentinel gate.** The sentinel must be **opt-in via a dedicated env var** — `LOOPX_TEST_PREITERATION_SENTINEL=1` (set in loopx's inherited environment by the harness, only for the pre-iteration signal-wins tests listed above). It must **not** be enabled by `NODE_ENV=test` alone, because Vitest sets `NODE_ENV=test` for every test in the suite and many tests assert empty / no-warning stderr — emitting the sentinel under `NODE_ENV=test` alone would leak the marker line into every test's stderr capture and break clean-stderr assertions. The dedicated env var is opt-in per-test, so only the pre-iteration signal-wins tests opt in. The other test-only seams in this section (`LOOPX_TEST_INSTALL_FAULT`, `LOOPX_TEST_AUTOINSTALL_FAULT`) remain `NODE_ENV=test`-gated because they alter behavior only when their value is set; the sentinel, by contrast, is purely additive stderr output that affects every gated run, so it requires the stricter dedicated-flag gate. + +The sentinel's exact text is implementation-defined but must be **parent-discoverable** without relying on reading the child's `process.env` from the parent (the parent test process does not observe mutations the child makes to its own `process.env`). The harness uses, in order of preference: + +- **(a) Fixed sentinel string under the dedicated flag.** The implementation commits to a fixed sentinel string emitted whenever `LOOPX_TEST_PREITERATION_SENTINEL=1` is set in loopx's inherited environment (and `NODE_ENV=test`, as a defense-in-depth gate against accidental production opt-in). The fixed string is documented in the implementation's test-harness README. The harness hard-codes the same fixed string as the `waitForStderr(...)` argument. This is the preferred discovery path because no additional process is spawned. +- **(b) Internal test-only exported constant.** The implementation exports the sentinel text as a package-internal constant (for example, on the `loopx/internal` subpath described in the "pure-function exports" table above). The harness imports the constant directly from the built package and uses its value as the `waitForStderr(...)` argument. Discovery does not require the env var; emission still does. +- **(c) Dedicated test-harness env-dump path.** The implementation provides a test-only entry point (a subcommand, flag, or binary) that, when invoked under `NODE_ENV=test` and `LOOPX_TEST_PREITERATION_SENTINEL=1`, prints the sentinel text to its own stdout and exits 0 without running the loop. The harness invokes this preflight once, captures stdout, and reuses the result across pre-iteration signal tests. Crucially, the harness observes the sentinel from the child's **stdout** (or the exported constant), not from `process.env`. + +Paths (a), (b), and (c) are functionally equivalent for the harness; each avoids hard-coding an implementation-specific string into the test files while remaining parent-observable. If an implementation prefers none of the above, the harness falls back to a short real-time delay before signaling, at the cost of increased flakiness. + +- The sentinel is only emitted when **both** `NODE_ENV=test` **and** `LOOPX_TEST_PREITERATION_SENTINEL=1` are set in loopx's inherited environment; production builds and the rest of the test suite do not see it. The pre-iteration signal-wins tests listed above are the only callers that set the dedicated flag; every other test (including the rest of the signal block, the tmpdir block, and the env-file block) leaves it unset and observes clean stderr unaffected by the sentinel. +- The sentinel is **not** a SPEC requirement — a SPEC-conforming implementation without this marker still satisfies SPEC.md. Tests that depend on the marker remain tagged `@flaky-retry(3)` as a fallback for environments where the sentinel is unavailable or where harness timing drifts; without the sentinel they can fall back to a short real-time delay before signaling, at the cost of increased flakiness. + +**Auto-install pause sentinel (test-only env-var seam, non-public, not part of SPEC conformance):** + +The auto-install no-active-child signal-window tests (T-INST-116h, T-INST-116i, T-INST-116j, T-INST-116k) need to deliver SIGINT / SIGTERM during one of four narrow windows in the post-commit auto-install pass when no `npm install` child is currently active: (1) **between sequential workflows** (after one workflow's auto-install processing has fully completed and before the next workflow's safeguard begins); (2) **during `.gitignore` safeguard work before spawning npm** (after the safeguard `lstat` dispatch decides to synthesize / accept the existing `.gitignore` and before the spawn call); (3) **after one npm child exits and before the next workflow begins processing** (the gap between the previous workflow's `npm install` completing and the next workflow's safeguard `lstat`); and (4) **after a `.gitignore` safeguard failure but before the next workflow** (the safeguard short-circuit path that records the failure in the aggregate report and skips that workflow's `npm install`). These windows are sub-millisecond on a healthy host and cannot be deterministically targeted via real-time delays. SPEC 10.10 specifies that auto-install runs "sequentially in an implementation-defined order" and explicitly notes the cross-workflow order is not an external conformance contract — an implementation that processes a multi-workflow source in any deterministic order is conforming. Tests that need to target a specific window therefore cannot hard-code a workflow name (e.g., `between-workflows:beta`) without baking in an unspecified ordering; instead the seam exposes **ordinal** window values (e.g., `between-workflows-after-first`) that resolve to "whichever workflow loopx happens to process first in its implementation-defined auto-install order," and emits a parent-observable marker file naming that workflow so the test can assert on the observed processed/current/remaining sets rather than on hard-coded names. The test suite assumes the implementation honors the following test-only environment variable: + +| Variable | Values | Behavior | +|----------|--------|----------| +| `LOOPX_TEST_AUTOINSTALL_PAUSE` | `` (ordinal form) **or** `:` (name-targeted form) | During the post-commit auto-install pass, when loopx reaches the named window, pause execution for a bounded interval (≥ 2 seconds and ≤ 10 seconds — long enough for the harness to deliver a signal, short enough to bound test runtime if the harness fails to deliver one) and then resume. **Recognized ordinal window values** (do not take a workflow-name suffix; resolve to "whichever workflow is at the named position in the implementation's auto-install order"): `between-workflows-after-first` (just after the *first* workflow's auto-install processing has fully completed, regardless of which workflow that was, and before any further workflow's safeguard begins), `pre-spawn-first` (during `.gitignore` safeguard work for the *first* workflow, after the safeguard dispatch decides what to do and before the `spawn("npm", …)` call), `post-exit-first` (just after loopx has observed the *first* workflow's `npm install` child exit **and recorded that workflow's auto-install terminal outcome, including any non-zero-exit aggregate failure entry**, and before any further per-workflow processing begins — i.e., the accumulator already reflects the just-exited child's success or non-zero-exit failure when the pause begins), `post-safeguard-failure-first` (just after the *first* safeguard failure for any workflow has been recorded in the aggregate report and before any further per-workflow processing begins), `post-spawn-failure-first` (just after the *first* `npm install` spawn failure for any workflow — `spawn("npm", …)` rejecting before producing a child process, e.g., the SPEC 10.10 "`npm install` spawn failure (most commonly: `npm` is not on `PATH`)" branch — has been recorded in the aggregate report and before any further per-workflow processing begins; structurally distinct from `post-exit-first` (which fires after a successfully-spawned child *exits*) and `post-safeguard-failure-first` (which fires after a per-workflow safeguard short-circuit before any spawn was attempted) — under this window no npm child has run, but the per-workflow safeguard for the failing workflow already completed before the failed spawn so its `.gitignore` synthesis side effects are on disk, and the aggregate accumulator already holds an `npm-install-spawn-failure` entry for the failing workflow), and `post-aggregate-report` (just after loopx has completed / flushed the stderr write for the final auto-install aggregate failure report — flushed as far as the runtime stream API supports for the underlying handle, since stderr is commonly a pipe in tests and `fsync(2)` on a pipe is not portable — and before loopx's process exits; used by T-INST-116l to exercise the SPEC 10.10 "unless it had already been emitted" carve-out half of the signal-termination clause; the seam fires only when an aggregate report is actually emitted, i.e., when the auto-install pass accumulated at least one failure entry, and is a no-op on a clean / no-failure auto-install pass). **Recognized name-targeted window values** (require a workflow-name suffix; useful for tests that compose with name-targeted FAULT seams or that otherwise need to pin the pause to a specific workflow without depending on the implementation-defined auto-install order — typically only sensible when a single-workflow source is used or when the targeted workflow is guaranteed to exist regardless of order): `between-workflows:` (just after the previous workflow's auto-install processing completed and before the named workflow's safeguard begins; the named workflow is the *upcoming* workflow), `pre-spawn:` (during `.gitignore` safeguard work for the named workflow, after the safeguard dispatch decides what to do and before the `spawn("npm", …)` call), `post-exit:` (just after loopx has observed the named workflow's `npm install` child exit **and recorded that workflow's auto-install terminal outcome, including any non-zero-exit aggregate failure entry**, and before any further per-workflow processing for the next workflow begins — i.e., the accumulator already reflects the just-exited child's success or non-zero-exit failure when the pause begins; the named workflow is the *just-completed* workflow), `post-safeguard-failure:` (just after the safeguard failure for the named workflow has been recorded in the aggregate report and before any further per-workflow processing for the next workflow begins; the named workflow is the workflow whose safeguard *just failed*), and `post-spawn-failure:` (just after the named workflow's `npm install` spawn failure has been recorded in the aggregate report and before any further per-workflow processing for the next workflow begins; the named workflow is the workflow whose spawn *just failed*). When the harness delivers SIGINT / SIGTERM during the pause, loopx observes the signal at that point in the auto-install state machine; when no signal arrives within the bounded interval, loopx resumes normally and the test reports a harness timing failure. Only one window value is honored per run; if the value is malformed (unknown window name, missing `:` for name-targeted forms, missing workflow name for name-targeted forms), the seam is a no-op. The bounded resume-after-timeout behavior is essential: without it, a harness bug or a lost signal could deadlock the run indefinitely. **Parent-observable pause marker (required).** The companion environment variable `LOOPX_TEST_AUTOINSTALL_PAUSE_MARKER` (also gated on `NODE_ENV=test`) names an absolute file path; when set together with `LOOPX_TEST_AUTOINSTALL_PAUSE`, loopx writes a UTF-8 JSON marker file at that absolute path **immediately on entry to the pause and before the bounded delay begins**, then `fsync`s and closes the file before starting the delay timer. The harness must place this path **outside** any directory loopx may clean up (in particular, outside `$LOOPX_TMPDIR`) so the marker survives loopx's exit for post-run reads. The harness sets `LOOPX_TEST_AUTOINSTALL_PAUSE_MARKER` to a path under the test's own temp directory. The marker contents have the shape `{"window": "", "current": "", "processed": ["", …], "remaining": ["", …]}` where (a) `window` echoes the resolved window value (e.g., `between-workflows-after-first` or `pre-spawn:beta`); (b) `current` is the workflow name at the seam's focal position — for `between-workflows-after-first` / `between-workflows:`, the *upcoming* workflow whose safeguard would run next; for `pre-spawn-first` / `pre-spawn:`, the workflow whose pre-spawn safeguard work the seam paused inside; for `post-exit-first` / `post-exit:`, the workflow whose `npm install` child just exited; for `post-safeguard-failure-first` / `post-safeguard-failure:`, the workflow whose safeguard just failed; for `post-spawn-failure-first` / `post-spawn-failure:`, the workflow whose `npm install` spawn just failed; (c) `processed` is the array of workflow names whose full auto-install processing reached terminal state (success or recorded failure) **before** `current`'s focal point in the auto-install order, excluding `current`; and (d) `remaining` is the array of workflow names whose processing has not started at pause-entry, also excluding `current`. The union of `processed`, `[current]`, and `remaining` equals the set of workflows the auto-install pass would process in the implementation's auto-install order. **For the `post-aggregate-report` ordinal value** the per-workflow focal-position fields do not apply (all per-workflow processing has terminal-stated by the time the report is emitted), so the marker is written with `current` set to `null`, `processed` set to the full set of workflow names whose processing reached a terminal outcome (success or recorded failure) during the pass, and `remaining` set to the empty array; tests that depend on this window read only `window` and ignore the per-workflow fields. Because the bounded-delay timer starts only **after** the marker is written and `fsync`'d, a harness that polls for the marker file's existence and then signals is guaranteed to deliver the signal during the bounded delay (subject to ordinary scheduler jitter). If `LOOPX_TEST_AUTOINSTALL_PAUSE_MARKER` is unset or names a non-writable path, the seam still pauses for the bounded interval but the marker is not written; tests that depend on the marker MUST set the variable. | + +This variable is only honored when **both** `NODE_ENV=test` **and** the variable itself is set with a recognized window value; production builds and the rest of the test suite do not see any pause. Like the function and AUTOINSTALL_FAULT seams above, it is not part of the public API and may change between versions without a semver bump. + +The pause seam is purely additive (it does not change the auto-install state machine's logic, only inserts a wall-clock delay and a marker write at one named point) and is only triggered on tests that explicitly opt in via the env-var value. Other tests in the suite — including the active-child signal tests T-INST-116 through T-INST-116g — leave it unset and observe normal auto-install timing. + +**Terminal-trigger pause sentinel (test-only env-var seam, non-public, not part of SPEC conformance):** + +The cleanup-idempotence and warning-cardinality tests (T-TMP-38, T-TMP-39) and the terminal-outcome precedence tests (T-TERM-01, T-TERM-02, T-TERM-03, T-TERM-04) need to deterministically order or interleave concurrent terminal triggers — outcomes whose timing is sub-millisecond on a healthy host and cannot be reproduced via real-time delays. SPEC 7.2's "first terminal trigger observed by loopx determines the surfaced outcome among genuinely racing triggers" rule depends on **loopx's own observation order**, which black-box tests cannot pin to a specific resolution without a coordination seam. The test suite assumes the implementation honors the following test-only environment variable: + +| Variable | Values | Behavior | +|----------|--------|----------| +| `LOOPX_TEST_TERMINAL_TRIGGER_PAUSE` | `cleanup-start` | At the entry of the `LOOPX_TMPDIR` cleanup routine — before any `lstat` / `unlink` / recursive-remove call — pause execution for a bounded interval (≥ 2 seconds, ≤ 10 seconds) and then resume. Used by T-TMP-38 (cleanup idempotence under racing terminal triggers — the harness delivers a second signal during the pause to exercise "at most one cleanup attempt"), T-TMP-39 (warning cardinality across racing terminals — same race, asserts at most one cleanup-related stderr warning), and T-TERM-03 (signal during cleanup of a prior signal — surfaced exit code reflects the *first* signal observation, not the second). | +| `LOOPX_TEST_TERMINAL_TRIGGER_PAUSE` | `child-exit-handler` | After a child-process exit is observed by the implementation (via `child.on("exit", …)` or equivalent) and **after** the implementation has recorded the child-exit terminal trigger as the first-observed trigger but **before** loopx dispatches the resulting terminal outcome to the surface (CLI exit code / `run` throw / `runPromise` rejection), pause for the bounded interval. Used by T-TERM-01 to deterministically pin the child-exit trigger as first-observed: the harness fires a competing abort during the post-observation pause; on resume, loopx must surface the child-exit outcome (because it was observed first), not the abort error (which arrived later). | +| `LOOPX_TEST_TERMINAL_TRIGGER_PAUSE` | `abort-listener` | After the captured `AbortSignal`'s abort listener fires and **after** the implementation has recorded the abort as the first-observed terminal trigger but **before** loopx dispatches the abort outcome, pause for the bounded interval. Used by T-TERM-02 to pin the abort trigger as first-observed: the harness invokes `.return()` / `.throw()` (the competing trigger) during the post-observation pause; on resume, loopx must surface the abort outcome (first-observed), not a `.return()` / `.throw()` settlement. | +| `LOOPX_TEST_TERMINAL_TRIGGER_PAUSE` | `child-spawn-failure` | After a `spawn` rejection is observed by the implementation and **after** the implementation has recorded the spawn-failure as the first-observed terminal trigger but **before** loopx dispatches the spawn-failure outcome, pause for the bounded interval. Used by T-TERM-04 variant a to pin the spawn-failure trigger as first-observed: the harness fires a competing abort during the pause; on resume, loopx must surface the spawn-failure outcome, not the abort error. | +| `LOOPX_TEST_TERMINAL_TRIGGER_PAUSE` | `child-spawn-attempt` | After loopx has decided to spawn a child for the next iteration and has begun the spawn attempt (e.g., entered the `child_process.spawn` / equivalent runtime call path) but **before** loopx has observed the spawn outcome (success → child-exit pathway, or failure → spawn-failure trigger), pause for the bounded interval. The seam fires only on the spawn-attempt path (not on the post-observation paths covered by `child-exit-handler` and `child-spawn-failure`). Used by T-TERM-04 variant b to deterministically separate the spawn-attempt window from competing abort observation: the harness configures a fixture whose spawn will fail (e.g., NUL-in-`RunOptions.env` per T-API-57), then fires `c.abort()` during the pause. Because the abort is delivered while loopx is paused mid-spawn-attempt and **before** loopx has observed the spawn outcome, on resume loopx's abort-listener observes the abort first and the spawn-failure later. The surfaced outcome must reflect the abort (first-observed), not the spawn failure. Without this seam, T-TERM-04 variant b cannot deterministically distinguish the residual "first-observed wins among genuinely racing triggers" rule (SPEC 7.2) from the explicit "abort wins over pre-iteration failures" rule (covered separately by T-API-65–65o), because abort delivered before the spawn attempt begins pre-empts the spawn entirely under the explicit precedence rule. | +| `LOOPX_TEST_TERMINAL_TRIGGER_PAUSE` | `consumer-return-observed` | After loopx's consumer-cancellation tracking observes a mid-loop `gen.return(value)` call (post-first-`next()`) and **after** the implementation has recorded the `.return()` as the first-observed terminal trigger but **before** loopx dispatches the resulting clean settlement, pause for the bounded interval. The seam fires only on mid-loop `.return()` (not on the pre-first-`next()` carve-out, which never enters loopx's machinery; not on `.throw()`). Used by T-TERM-02 variant b to pin the `.return()` trigger as first-observed: the harness drives `gen.next()` once (entering the loop), waits for the script's "ready" marker, then calls `gen.return(undefined)`; loopx observes the `.return()` and pauses; the harness fires `c.abort()` during the pause; on resume, loopx must surface the clean `.return()` settlement (per standard async-generator semantics — first-observed trigger wins), not an abort error. | +| `LOOPX_TEST_TERMINAL_TRIGGER_PAUSE` | `consumer-throw-observed` | Counterpart to `consumer-return-observed` for the `.throw()` axis. After loopx observes a mid-loop `gen.throw(err)` call (post-first-`next()`) as the first-observed terminal trigger but before dispatch, pause for the bounded interval. Used by tests that need to pin `.throw()` as the first-observed trigger against a competing abort or non-zero exit, with the same parent-observable marker contract as the other window values. | + +When the harness delivers the second trigger (signal, abort, `.return()`, `.throw()`, or whatever competes with the first-observed) during the pause, loopx observes it *after* the first-observed trigger is already pinned; the surfaced outcome must reflect the first-observed trigger, not the late one. When no second trigger arrives within the bounded interval, loopx resumes normally and the pinned-first trigger drives the outcome (the test reports a harness timing failure if it expected a race). Only one window value is honored per run; if the value is malformed (unknown window name), the seam is a no-op. The bounded resume-after-timeout behavior is essential: without it, a harness bug or a lost trigger could deadlock the run indefinitely. + +**Parent-observable pause marker (required).** The companion environment variable `LOOPX_TEST_TERMINAL_TRIGGER_PAUSE_MARKER` (also gated on `NODE_ENV=test`) names an absolute file path; when set together with `LOOPX_TEST_TERMINAL_TRIGGER_PAUSE`, loopx writes a UTF-8 JSON marker file at that absolute path **immediately on entry to the pause and before the bounded delay begins**, then `fsync`s and closes the file before starting the delay timer. The harness must place this path **outside** any directory loopx may clean up (in particular, outside `$LOOPX_TMPDIR`) so the marker survives loopx's exit for post-run reads. The marker contents have the shape `{"window": ""}`; additional fields may be added in future without breaking compatibility, and tests should ignore unknown fields. Because the bounded-delay timer starts only **after** the marker is written and `fsync`'d, a harness that polls for the marker file's existence and then triggers the second event is guaranteed to deliver the trigger during the bounded delay (subject to ordinary scheduler jitter). If `LOOPX_TEST_TERMINAL_TRIGGER_PAUSE_MARKER` is unset or names a non-writable path, the seam still pauses for the bounded interval but the marker is not written; tests that depend on the marker MUST set the variable. + +This variable is only honored when **both** `NODE_ENV=test` **and** the variable itself is set with a recognized window value; production builds and the rest of the test suite do not see any pause. Like the auto-install pause seam above, it is not part of the public API and may change between versions without a semver bump. The pause seam is purely additive (it does not change the terminal-trigger dispatch logic, only inserts a wall-clock delay between trigger observation and outcome dispatch, and a marker write) and is only triggered on tests that explicitly opt in via the env-var value. + +**Cleanup-warning structured marker (test-only stderr seam, non-public, not part of SPEC conformance):** + +Because SPEC 7.4 leaves the cleanup-warning text implementation-defined, "exactly one cleanup-related warning" / "no cleanup-related warning" / "at most one cleanup-related warning" assertions across the suite (T-TMP-35*, T-TMP-36*, T-TMP-38*, T-TMP-40, T-TMP-41, T-TMP-42, T-TMP-42a, T-TMP-42b, T-TMP-42c, T-TERM-03, T-INST-116l, etc.) need an **implementation-neutral detection predicate** rather than a fixed text match. Many of these tests run on **mixed-stderr terminals** (creation-failure errors alongside cleanup warnings — T-TMP-12d2 / T-TMP-12e2 / T-TMP-12e3; script-failure terminals — T-TMP-35c / T-TMP-35d / T-TMP-35e / T-TMP-42a; abort terminals — T-TMP-42b; signal terminals — T-TMP-35f / T-TMP-42c; racing-trigger warning-cardinality — T-TMP-38 / T-TMP-39 / T-TMP-38a / T-TMP-38a2 / T-TMP-38b / T-TMP-38c / T-TMP-38d / T-TMP-38d2 / T-TMP-38e), where the non-cleanup stderr is **not** a caller-controlled fixture marker but an implementation-defined loopx-emitted error whose text varies between conforming implementations. Inferred-residue counting (subtract known fixture markers, count what's left) cannot reliably distinguish a cleanup warning from an implementation-defined creation-failure or script-failure error in those cases. The harness therefore requires a **structured cleanup-warning marker** as a test-only seam. + +When `NODE_ENV=test`, every cleanup-warning emission also emits a fixed structured marker line to stderr alongside the human-readable warning text. The marker is purely additive (does not change SPEC-conformant warning emission) and is gated on `NODE_ENV=test` so production builds never emit it. The marker line follows the shape `LOOPX_TEST_CLEANUP_WARNING\t` — a fixed leading prefix `LOOPX_TEST_CLEANUP_WARNING` followed by a tab and an implementation-defined remainder (e.g., a JSON payload, a path mention, or any other diagnostic data the implementation finds useful for test-time inspection). The harness counts only lines beginning with `LOOPX_TEST_CLEANUP_WARNING\t` as cleanup-warning emissions; the human-readable warning text alongside the marker is not part of the cardinality count. + +**Required, not optional.** Unlike the pre-iteration sentinel above (which has fallback predicates a/b/c with documented flakiness), the cleanup-warning marker is **required** for cleanup-warning cardinality tests because mixed-stderr terminals offer no caller-controlled disambiguation. An implementation that does not emit the marker under `NODE_ENV=test` cannot reliably pass cleanup-warning cardinality tests. The marker is **not part of the public SPEC contract** — like the rest of the seams in this section (`LOOPX_TEST_INSTALL_FAULT`, `LOOPX_TEST_TMPDIR_FAULT`, `LOOPX_TEST_CLEANUP_FAULT`, `LOOPX_TEST_AUTOINSTALL_PAUSE`, `LOOPX_TEST_TERMINAL_TRIGGER_PAUSE`, `LOOPX_TEST_PREITERATION_SENTINEL`), it is a test-only harness contract that may change between versions without a semver bump. + +**Discovery.** The fixed prefix `LOOPX_TEST_CLEANUP_WARNING` is documented in the implementation's test-harness README and hard-coded into the harness. As an alternative to a hard-coded prefix, the implementation may export the prefix as a package-internal constant on a `loopx/internal` subpath (analogous to the pre-iteration sentinel paths above), in which case the harness imports the constant directly. The exact discovery path is implementation-defined; the harness's only requirement is parent-discoverable, parent-observable marker text. + +**Negative assertions.** The negative form ("no cleanup-related warning" — used by T-TMP-12a "no path exists, so no cleanup is needed" and similar zero-warning paths) inverts the predicate: assert that zero `LOOPX_TEST_CLEANUP_WARNING\t…` lines appear on stderr. + +**Disambiguation from non-cleanup stderr.** Because the marker is keyed on a fixed prefix, the harness can disambiguate the cleanup warning from other stderr categories in the same run regardless of the implementation-defined wording of either: a creation-failure error in T-TMP-12d2 (loopx-emitted, implementation-defined text, no marker prefix) is not counted as a cleanup warning even when it appears alongside one (with the marker prefix). The script-failure marker line `SCRIPT-FAILURE-MARKER-T-TMP-35C` (caller-controlled) is also not counted because it does not carry the cleanup-warning prefix. Tests no longer need to specify "subtract known fixture markers, then count residue" — they simply count `LOOPX_TEST_CLEANUP_WARNING\t…` lines. --- @@ -110,7 +210,7 @@ tests/ types.test.ts Compile-time type surface verification helpers/ cli.ts CLI spawning utilities (runCLI, runCLIWithSignal) - api-driver.ts Programmatic API driver (runAPIDriver) + api-driver.ts Programmatic API driver (runAPIDriver, runAPIDriverLive) fixtures.ts Temp dir, workflow, script, and project creation servers.ts Local HTTP & git servers, git URL rewriting env.ts Env file creation, global config, isolated home @@ -198,9 +298,93 @@ const result = await runAPIDriver("bun", ` const outputs = JSON.parse(result.stdout); ``` -#### `runCLIWithSignal(args, options): Promise` +#### `runAPIDriverLive(runtime, code, options?): APIDriverHandle` + +Like `runAPIDriver`, but **does not return a settled `Promise`** — it returns a live driver-process handle so the test can observe mid-run events (stderr markers, stdout markers, the driver's PID) and deliver out-of-band signals or coordination messages while the driver script (and its loopx loop) is still executing. A settled promise is incompatible with mid-run synchronization patterns required by several programmatic-API signal / cancellation tests (T-API-10d / 10e / 10f / 10g / 10h, the `runPromise` signal counterparts T-API-10d-promise / T-API-10e-promise, and other tests where the harness must `waitForStderr("ready")` and read a PID from a marker before calling `c.abort()` / `gen.return()` / etc.). Without this helper, those tests would have to embed all synchronization inside the driver script itself, which prevents the harness from reading the child's stderr or PID at the synchronization point. + +This is the programmatic-API counterpart to `runCLIWithSignal` for CLI tests. + +```typescript +interface APIDriverHandle { + /** Send a signal to the driver process (which forwards into loopx via standard runtime signal disposition). Returns once the signal has been delivered from the harness. */ + sendSignal(signal: "SIGINT" | "SIGTERM" | "SIGKILL" | NodeJS.Signals): void; + /** Resolves when the driver's stderr emits a line matching `pattern`. Rejects on timeout or driver exit. The driver's stderr captures script stderr (per Spec 9.3 stderr passthrough) and any harness-instrumentation lines the driver script writes itself. */ + waitForStderr(pattern: string | RegExp, options?: { timeoutMs?: number }): Promise; + /** Resolves when the driver's stdout emits a line matching `pattern`. Rejects on timeout or driver exit. The driver's stdout is where the test driver script prints its own JSON results (e.g., yielded outputs) — useful for tests that need to observe partial output before the loop completes. */ + waitForStdout(pattern: string | RegExp, options?: { timeoutMs?: number }): Promise; + /** Resolves with the final `{ stdout, stderr, exitCode }` once the driver exits. Idempotent. */ + waitForExit(): Promise<{ stdout: string; stderr: string; exitCode: number }>; + /** The driver child's process ID (available once spawn succeeds). */ + pid: number; +} +``` + +The driver script itself is responsible for surfacing per-iteration markers the harness needs (e.g., emitting `"ready"` to stderr after `gen.next()` yields, or printing the active child's PID via a fixture-supplied marker file). The harness reads markers via `waitForStderr` / `waitForStdout` or via filesystem polling on a fixture-written marker file, then triggers the next coordination step (e.g., `sendSignal`, or a follow-up file-system mutation). -Like `runCLI`, but also returns a `sendSignal(signal)` function and a `waitForStderr(pattern)` function so the test can send SIGINT/SIGTERM at a controlled point during execution. +Typical usage: + +```typescript +// T-API-10d-style: aborting an active-child run via runPromise(). +// The driver script awaits runPromise() but is structured so the abort can +// be delivered from outside the driver process. +const handle = runAPIDriverLive("node", ` + import { runPromise } from "loopx"; + // The fixture's child script writes its PID to a marker file and "ready" + // to stderr; the harness reads the marker, then sends a signal to *this* + // driver process to trigger the abort. + process.on("SIGUSR1", () => globalThis.__loopxAbortController?.abort()); + globalThis.__loopxAbortController = new AbortController(); + try { + const outputs = await runPromise("ralph", { + cwd: "${project.dir}", + signal: globalThis.__loopxAbortController.signal, + }); + console.log(JSON.stringify({ ok: true, outputs })); + } catch (e) { + console.log(JSON.stringify({ ok: false, error: String(e) })); + } +`, { cwd: project.dir }); + +await handle.waitForStderr("ready"); +const childPid = readPidMarker(); // fixture-supplied marker +handle.sendSignal("SIGUSR1"); // triggers c.abort() inside driver +const { stdout, exitCode } = await handle.waitForExit(); +const result = JSON.parse(stdout); +expect(result.ok).toBe(false); +expect(result.error).toMatch(/abort/i); +expect(processIsAlive(childPid)).toBe(false); +``` + +Tests may alternatively choose to deliver coordination via a file-system sentinel the driver script polls, or via a per-driver-process IPC channel — the helper is agnostic to the in-driver synchronization mechanism, but it must expose `waitForStderr` / `waitForStdout` / `waitForExit` / `pid` / `sendSignal` so the harness can observe child markers and trigger out-of-band actions before the driver settles. + +#### `runCLIWithSignal(args, options): CLIProcessHandle` + +Like `runCLI`, but **does not return a settled `Promise`** — it returns a live process handle so the test can observe mid-run events (stderr markers, child state) and send signals while loopx is still executing. A settled `Promise` is incompatible with mid-run signaling, which is why this helper exposes a handle object instead of a promise. + +```typescript +interface CLIProcessHandle { + /** Send a signal to the loopx process. Returns once the signal has been delivered from the harness. */ + sendSignal(signal: "SIGINT" | "SIGTERM" | "SIGKILL" | NodeJS.Signals): void; + /** Resolves when stderr emits a line matching `pattern` (string substring or RegExp). Rejects on timeout or child exit. */ + waitForStderr(pattern: string | RegExp, options?: { timeoutMs?: number }): Promise; + /** Resolves when stdout emits a line matching `pattern` (string substring or RegExp). Rejects on timeout or child exit. Used by streaming-passthrough tests (e.g., T-INST-119a) to observe bytes before the child exits. */ + waitForStdout(pattern: string | RegExp, options?: { timeoutMs?: number }): Promise; + /** Resolves with the final `CLIResult` once loopx exits. Idempotent — calling twice returns the same settlement. */ + waitForExit(): Promise; + /** The child's process ID (available once spawn succeeds). */ + pid: number; +} +``` + +Typical usage: + +```typescript +const handle = runCLIWithSignal(["run", "ralph"], { cwd: project.dir }); +await handle.waitForStderr("ready"); +handle.sendSignal("SIGINT"); +const result = await handle.waitForExit(); +expect(result.exitCode).toBe(130); +``` #### `createEnvFile(path, vars): void` @@ -253,6 +437,251 @@ await withGitURLRewrite({ }); ``` +#### `withFakeNpm(options, fn): Promise` + +Creates a throw-away directory containing a shim executable named `npm` and prepends that directory to `PATH` for the duration of `fn`. The shim is used by Spec 10.10 auto-install tests to make `npm install` deterministic and observable without depending on real npm. Options control the shim's behavior: + +```typescript +interface FakeNpmOptions { + // Default exit code the shim returns. 0 = success; non-zero = npm install failure. + exitCode?: number; // default 0 + // Per-workflow exit-code overrides, keyed by the workflow name (derived from + // the shim's cwd basename under `.loopx/`). Example: `{ beta: 1 }` makes + // `npm install` exit non-zero only for `.loopx/beta/`, regardless of the + // auto-install-order position of `beta` among the processed workflows. Workflows + // not listed fall back to `exitCode`. Used by T-INST-114 (fail on a single + // named workflow in a multi-workflow install). Workflow-name keying rather + // than invocation-order keying is required because SPEC 10.10 explicitly + // makes the cross-workflow auto-install order implementation-defined and + // not an external conformance contract — so an invocation-position key + // (`{ 2: 1 }`) would tacitly bake an unspecified ordering assumption into + // the test. + exitCodeByWorkflow?: Record; + // If true, the helper constrains PATH for `fn` so `npm` resolution fails + // with ENOENT and no shim is created. Used by T-INST-114a for the + // all-invocations spawn-failure case. + // + // Note on per-invocation spawn failure (T-INST-114b): a black-box fake `npm` + // shim cannot reliably make only a specific `spawn("npm")` call fail with + // ENOENT while other calls succeed, because once a shim process starts, the + // spawn itself has already succeeded. Per-workflow spawn-failure coverage is + // driven through the `LOOPX_TEST_AUTOINSTALL_FAULT=npm-spawn-fail:` + // implementation seam (section 1.4), not through this helper. This field is + // therefore intentionally omitted from the helper; selective per-invocation + // spawn failure is routed through the seam instead. + spawnFailure?: boolean; // default false + // Optional stdout/stderr the shim writes before exiting. + stdout?: string; + stderr?: string; + // If set (global), the shim sleeps this many seconds before exiting — used by + // signal-during-npm-install tests so the harness can send a signal while + // the child is active. Combined with the ready-protocol: the shim writes + // "ready" to stderr immediately on start, writes its PID to `pidFile` (if + // set), then sleeps. Tests that care only about "signal while the first + // workflow is installing, subsequent workflows must not be processed" can + // use `sleepSeconds` alone because the first invocation is observable + // regardless of which workflow it targets. + sleepSeconds?: number; + // Per-workflow sleep overrides, keyed by the workflow name (derived from the + // shim's cwd basename under `.loopx/`). Example: `{ alpha: 30 }` makes the + // shim sleep only when invoked in `.loopx/alpha/`, regardless of + // auto-install-order position. Workflows not listed fall back to + // `sleepSeconds` (or exit immediately if `sleepSeconds` is unset). + // Workflow-name keying avoids the same implementation-defined-ordering + // issue described for `exitCodeByWorkflow`. + sleepByWorkflow?: Record; + // If set, the shim installs a Bash `trap '' ` clause for each listed + // signal before entering the sleep, so the shim ignores those signals and + // stays alive past the normal termination path. Used by T-INST-116c to force + // the 5-second grace period + SIGKILL escalation path — the shim traps SIGTERM + // (and optionally SIGINT), so loopx must escalate to SIGKILL to kill the + // child after the grace window. This mirrors the `signal-trap-ignore` fixture + // in section 2.4 applied to the npm shim context. + trapSignals?: Array<"TERM" | "INT">; + // If true, the shim spawns a long-lived background grandchild process + // (for example `sleep 3600 &`), writes that PID to `grandchildPidFile`, + // writes "ready" to stderr, then waits. Used by T-INST-116e to verify + // that loopx forwards signals to the npm child's process group, not only + // to the npm shim process itself. + spawnGrandchild?: boolean; + grandchildPidFile?: string; + // If set, the shim writes its own PID (`$$`) to this absolute file path as + // part of the ready protocol — after writing "ready" to stderr and **before** + // any optional `sleepSeconds` wait or `exitCode` exit. The harness reads this + // file (after `waitForStderr("ready")` resolves, the PID file is guaranteed + // present) to obtain the shim PID while the shim is still alive — required + // by tests that observe shim liveness mid-flight (e.g., `kill -0 `, + // active-child signal forwarding tests T-INST-116 / 116a / 116c / 116f, the + // streaming-passthrough tests T-INST-119a / T-INST-119a-stderr, and any test + // that needs the shim PID before the per-invocation log entry has been + // finalized at process exit). The per-invocation log entries written via + // `logFile` are appended **on shim exit** (each entry includes both + // `startedAtMs` and `endedAtMs`), so they are not observable while the shim + // is still alive — `pidFile` is the canonical alive-shim PID surface. The + // file is written via `printf '%s' "$$" > ""` so the contents are + // exactly the PID with no trailing newline; harness consumers parse a + // non-negative integer. Tests that need per-workflow PID files (e.g., one + // PID file per multi-workflow shim invocation) supply distinct paths + // through their fixture setup; the helper does not generate per-workflow + // paths automatically. + pidFile?: string; + // Optional relative file paths (resolved against the shim's cwd) to create + // before the shim sleeps and/or exits. Parent directories are created as + // needed. Used to simulate partial `node_modules/` state that loopx must not + // clean up after failed or interrupted auto-install runs (T-INST-116d, + // T-INST-117a). + createFiles?: string[]; + // Marker file path; every shim invocation appends one line of JSON + // describing its argv, cwd, and selected env vars (PATH, LOOPX_*, TMPDIR, + // ...) so the test can assert which workflows npm was invoked for, in + // what order, and with what environment. **The log entry is written on + // shim exit** — it includes both `startedAtMs` and `endedAtMs`, and is + // therefore not observable while the shim is still running. Tests that + // need the shim PID (or any other start-time field) while the shim is + // alive must use `pidFile` (and the stderr `"ready"` marker for + // synchronization), not the log file. + // + // **Signal-termination behavior (required).** "On shim exit" includes + // signal-induced termination by SIGINT or SIGTERM (the signals + // T-INST-116b / T-INST-116b2 / T-INST-116j / T-INST-116j2 etc. assert + // log-invocation counts against). The shim implementation must therefore + // install a Bash `trap` clause on `EXIT` (or equivalent) early in its + // preamble — before the optional `sleepSeconds` wait — that finalizes + // the per-invocation log entry regardless of whether the shim ran to + // completion or was interrupted by a signal-driven Bash exit. The trap + // captures `endedAtMs` at the moment of termination so the + // `startedAtMs <= endedAtMs` invariant holds; tests must not rely on + // byte-exact `endedAtMs` content under signal termination, only on the + // entry's existence and on the `argv` / `cwd` fields used for + // invocation-count and per-workflow-cwd assertions. **SIGKILL is + // intentionally not covered by this guarantee** (no userspace trap can + // run when a process is SIGKILL'd); tests in the SIGKILL-escalation + // path (T-INST-116c with `trapSignals: ["TERM", "INT"]` and the + // SIGINT-axis counterpart T-INST-116f) accordingly do **not** assert on + // the shim log — they assert on shim PID termination via `pidFile` + + // `kill -0` instead. Tests that need to assert "exactly N invocations + // ran before the signal" under SIGINT / SIGTERM termination read the + // exit-time log; tests that need mid-flight observation use `pidFile` + // and the stderr `"ready"` marker plus on-disk side effects (e.g., + // `.gitignore` synthesis state per T-INST-116b's clause (c)/(d)) so + // they do not depend on the exit-time log at all. + logFile: string; + // If true, the shim records — as its very first action on startup, before + // any other side effect — whether `/.gitignore` existed at the moment + // of spawn, plus its content if present. Used by T-INST-112 to pin down the + // SPEC 10.10 ordering "Immediately before spawning `npm install`, loopx + // checks for a top-level `.gitignore` ... If none exists, loopx creates + // one containing the single line `node_modules`." The shim captures + // `fs.existsSync('.gitignore')` and the file's content as the very first + // line of its Bash preamble (before stdout/stderr emission, before + // `createFiles` actions, before `sleepSeconds` waits). The recorded state + // appears on each `FakeNpmInvocation` log entry as `gitignoreAtStart`. + // Without this option, the per-invocation log entry omits the + // `gitignoreAtStart` field (existing tests are unaffected). + recordGitignoreAtStart?: boolean; +} + +interface FakeNpmResult { + logFile: string; // same as options.logFile + readInvocations(): FakeNpmInvocation[]; // parsed log entries +} + +interface FakeNpmInvocation { + argv: string[]; // argv passed to npm + cwd: string; // shim's process.cwd() + env: Record; // selected env vars seen by shim + pid: number; + /** Wall-clock milliseconds (`Date.now()` in the shim) at which the shim + * started recording its invocation, captured **before** any optional + * `sleepSeconds` wait or `exitCode` exit. Used by tests that assert + * sequential / non-overlapping per-workflow processing per SPEC 10.10 + * ("sequentially in an implementation-defined order; npm children do + * not overlap") — e.g., T-INST-110 (two workflows alpha/beta both reach + * the shim) and T-INST-114 (three workflows alpha/beta/gamma all reach + * the shim). The recorded value is from the shim's clock, so + * cross-process clock-skew is not a concern within a single install + * command. */ + startedAtMs: number; + /** Wall-clock milliseconds at which the shim finished and was about to + * exit. Combined with `startedAtMs`, lets tests verify that two + * per-workflow invocations did not overlap in wall-clock time + * (`a.endedAtMs <= b.startedAtMs` or vice versa). */ + endedAtMs: number; + /** Snapshot of the `.gitignore` file at the moment of spawn, captured as + * the first action in the shim's preamble before any other side effect. + * Present **only** when `FakeNpmOptions.recordGitignoreAtStart` is true; + * omitted otherwise. `existed` is `fs.existsSync('.gitignore')` at shim + * start; `content` is the file's full content as a string when `existed` + * is true (UTF-8 read, no trimming), or omitted when `existed` is false. + * This is the seam used by T-INST-112 to assert that loopx's `.gitignore` + * safeguard ran **before** spawning `npm install` per SPEC 10.10's + * "Immediately before spawning `npm install`" wording — a buggy + * implementation that ran npm first and synthesized `.gitignore` + * afterward would produce `existed: false` here while still leaving a + * correct end-state on disk. */ + gitignoreAtStart?: { + existed: boolean; + content?: string; + }; +} +``` + +The shim is implemented as a small Bash script (or a `tsx`-driven Node/Bun script when `npm` might need to behave differently per runtime). Tests that need both invocation recording *and* signal/ready synchronization combine `sleepSeconds` with the standard ready-protocol (`waitForStderr("ready")`) documented for the signal fixtures in section 2.4. + +**Raw-env recording for PWD-sensitive assertions (required).** The per-invocation log entry's `env` field records selected environment variables observed by the shim. For most variables, recording via the host shell's interpolated value (e.g., Bash `${VAR}` or Node `process.env.VAR`) yields the inherited value byte-for-byte. **The `PWD` variable is a special case under a Bash shim**: Bash rewrites its own `$PWD` on shell startup to match `getcwd(3)` whenever the inherited `PWD` does not refer to the current working directory, and because Bash exports `PWD`, the rewrite is also visible to subsequent `printenv PWD` / `env` reads of the shell's own environment. A Bash shim that logs `PWD` via `${PWD}` (or via `printenv PWD` / `env | grep PWD` after Bash has initialized) therefore records the **shell-rewritten** value, which equals the shim's own cwd (e.g., `.loopx//`) — not loopx's inherited `PWD` (e.g., ``). A conforming implementation that correctly inherits loopx's `PWD` unchanged would falsely fail an assertion that compares the recorded `PWD` to loopx's inherited `PWD` byte-for-byte. The helper must therefore record `PWD` (and any other variable whose value is sensitive to shell-managed-`$PWD` semantics) via a **raw-env mechanism** that observes the process's inherited environment **before any shell-startup rewrite** — equivalently: a Node/Bun shim implementation reading `process.env.PWD` (Node and Bun do not rewrite the inherited `PWD` to `getcwd()` on startup); or, in a Bash shim, an `exec env` / `exec /usr/bin/env` invocation from the first executable line of the shim (before Bash's `cmd_init()` PWD rewrite has taken effect, captured by spawning a sub-`env` process and parsing its output) or a direct read of `/proc/self/environ` on Linux. The recommended approach is a Node/Bun shim implementation when the test's invocation log requires a faithful `PWD` field. Tests that depend on byte-for-byte `PWD` parity with loopx's inherited environment (T-INST-115 specifically) MUST use this raw-env recording path; tests that record `PWD` only as a "field present, value not asserted byte-for-byte" observation are not constrained. + +This helper must also be able to make `npm` resolution fail deterministically +on hosts that already have a real `npm` installed. The all-invocations +`spawnFailure: true` option **must** cause `spawn("npm", …)` itself to +fail with `ENOENT` — i.e., the `spawn` call never produces a child +process. It is implemented by **PATH control / no resolvable `npm`** (the +helper rewrites `PATH` for the duration of `fn` so no `npm` binary is +findable, and creates no shim file). A dispatcher-shim implementation +that starts a process which then exits non-zero is **not** equivalent — +that is a non-zero exit, not a spawn failure, and would defeat +T-INST-114a's contract that the fake-npm log records zero invocations +(the shim never started). Selective per-invocation spawn failure (e.g., +failing only the 2nd of three workflow installs) is **intentionally not** +a helper option — per the `spawnFailure` field note above, that coverage +is routed through the `LOOPX_TEST_AUTOINSTALL_FAULT=npm-spawn-fail:` +test-only implementation seam (section 1.4), since a black-box shim cannot +make only a specific `spawn("npm")` call fail with ENOENT once the shim +process has already started. Any "`spawnFailureOnInvocation`" terminology +elsewhere should be read as referring to the seam, not to a helper option. + +This helper is the primary seam for Spec 10.10 auto-install coverage (`T-INST-110` block below). Without it, auto-install tests would be flaky (depending on real npm + registry availability) or impossible (`spawnFailure`, signal-during-npm-install, and `PATH`-controlled spawn behavior cannot be driven deterministically by the real `npm`). + +#### `withFakeGit(options, fn): Promise` + +Creates a throw-away directory containing a shim executable named `git` and prepends that directory to `PATH` for the duration of `fn`. The shim wraps the **real** `git` binary (resolved from a known absolute path, e.g., `/usr/bin/git`, to avoid recursing into the shim) so that git operations still succeed against local bare repos, while every invocation is recorded to a log file for assertion. Used by Spec 10.2 git install tests that need to observe the argv `loopx` passed to `git` — most importantly T-INST-87's assertion that `loopx install` invokes `git clone` with `--depth 1`. + +```typescript +interface FakeGitOptions { + // Marker log file; every shim invocation appends one line of JSON + // describing its argv, cwd, and selected env vars so the test can + // assert what subcommand, flags, and URL loopx passed. + logFile: string; + // Absolute path to the real git binary that the shim should exec. + // Defaults to `/usr/bin/git`. Tests may override for CI environments + // where git lives elsewhere. + realGitPath?: string; +} + +interface FakeGitResult { + logFile: string; + readInvocations(): FakeGitInvocation[]; +} + +interface FakeGitInvocation { + argv: string[]; // full argv including the `git` program name at [0] + cwd: string; + env: Record; + exitCode: number; // exit code of the real git invocation +} +``` + +The shim is a thin Bash wrapper: it appends an argv/cwd/env log entry, then `exec`s the real git with the original argv. This guarantees loopx's actual git interactions still succeed (clones into staging, etc.) while the test has a black-box-visible record of `git clone --depth 1 ` and similar. Without this seam, "clone has only 1 commit" is observable only by post-clone `git log` inspection of the final installed workflow directory — but installed workflows do not retain the `.git/` directory after the stage-then-commit copy, so post-install `git log` is not available. Asserting on the argv loopx passed to the cloning process is the correct black-box-visible assertion. + #### `forEachRuntime(fn): void` Test parameterization helper. Runs a test block once for each available runtime (Node.js, Bun). Skips a runtime if it's not installed. Example: @@ -283,21 +712,31 @@ A catalog of reusable fixture scripts used across tests. Each is a function that | `sleep-then-exit(seconds)` | bash | Sleeps for ``, then exits 0. General-purpose long-running script. For signal tests, prefer the dedicated `signal-*` fixtures which follow the ready-protocol. | | `write-env-to-file(varname, markerPath)` | bash | `printf '%s' "$VARNAME"` to a marker file. Uses `printf '%s'` (not `echo`) to avoid trailing newline and backslash interpretation. Observation via filesystem, not CLI stdout. | | `observe-env(varname, markerPath)` | ts | Writes JSON `{ "present": true, "value": "..." }` or `{ "present": false }` to a marker file using `fs.writeFileSync`. Distinguishes unset from empty string. Use instead of `write-env-to-file` when the test must differentiate between a variable being absent vs set to `""`. | -| `write-cwd-to-file(markerPath)` | bash | `printf '%s' "$PWD"` to a marker file. Uses `printf '%s'` (not `echo`) for exact-byte safety. | +| `write-cwd-to-file(markerPath)` | bash | `/bin/pwd -P` piped through `printf '%s'` to a marker file. Uses `/bin/pwd -P` (not `$PWD`) because Spec 6.1 explicitly declares `$PWD` non-authoritative for cwd assertions — `$PWD` is shell-maintained and inherits loopx's own `PWD`, so byte-for-byte equality between `$PWD` and the spawn cwd is not a loopx guarantee. `/bin/pwd -P` reports the kernel cwd spelling via `getcwd(3)`. | +| `observe-cwd-ts(markerPath)` | ts | Writes `process.cwd()` to a marker file via `fs.writeFileSync`. Use for cwd assertions in JS/TS contexts where Node/Bun's `getcwd(3)`-based `process.cwd()` is authoritative. Complements the Bash `write-cwd-to-file` fixture (which uses `/bin/pwd -P`). | +| `observe-pwd(markerPath)` | bash | `printf '%s' "${PWD:-}"` to a marker file. **Separate from `write-cwd-to-file`** — this fixture exists specifically to observe Bash's shell-managed `$PWD` variable (not the kernel cwd, and not the inherited env-map value), used by a narrow class of tests that specifically want to observe Bash's shell-level PWD. **Do not use for exact-value `PWD` passthrough assertions** — Bash may rewrite `$PWD` during shell startup under normal POSIX / Bash semantics, so byte-for-byte equality between Bash `$PWD` and a caller-supplied value is not a loopx guarantee (Spec 6.1 / 8.3 / 13). Tests that assert exact passthrough of `PWD` via the env-map tiers (T-PWD-01 / 02 / 03 / 06) use `observe-env` with variable name `PWD` instead, which reads `process.env.PWD` directly under Node / Bun and preserves the inherited value byte-for-byte. | | `write-value-to-file(value, markerPath)` | bash | `printf '%s' ''` to a marker file. Uses `printf '%s'` (not `echo`) for exact-byte safety — avoids trailing newlines, backslash interpretation, and issues with values starting with `-`. General-purpose observation helper. | | `stdout-writer(payloadFile)` | ts | Reads `payloadFile` from disk and writes its contents to stdout via `process.stdout.write()`. Used for fuzz and exact-byte output tests. | | `ts-output(fields)` | ts | Uses `import { output } from "loopx"` to emit structured output | | `ts-input-echo()` | ts | Reads input(), outputs it as result | | `ts-import-check()` | ts | Imports from "loopx", outputs success marker | | `signal-ready-then-sleep(markerPath)` | bash | Writes `$$` (the script's PID) to a marker file using `printf '%s'`, then writes `"ready"` to stderr, then sleeps indefinitely. The stderr marker allows the test harness to `waitForStderr("ready")` before sending a signal, ensuring the child is alive. | -| `signal-trap-exit(markerPath, delay)` | bash | Traps SIGTERM with a handler that sleeps for `` seconds then exits 0. On startup, writes `$$` to a marker file using `printf '%s'` and writes `"ready"` to stderr. Used for grace-period tests — delay < 5s tests clean exit, delay > 5s tests SIGKILL escalation. | -| `signal-trap-ignore(markerPath)` | bash | Traps SIGTERM and ignores it (handler is a no-op). On startup, writes `$$` to a marker file using `printf '%s'` and writes `"ready"` to stderr, then sleeps indefinitely. Used for SIGKILL-after-grace-period tests (T-SIG-05). | +| `signal-trap-exit(markerPath, delay, signal?)` | bash | Traps the named signal (`signal` defaults to `TERM`; supply `INT` for SIGINT-parameterized variants like T-SIG-04a) with a handler that sleeps for `` seconds then exits 0. On startup, writes `$$` to a marker file using `printf '%s'` and writes `"ready"` to stderr. Used for grace-period tests — delay < 5s tests clean exit, delay > 5s tests SIGKILL escalation. | +| `signal-trap-ignore(markerPath, signal?)` | bash | Traps the named signal (`signal` defaults to `TERM`; supply `INT` for SIGINT-parameterized variants like T-SIG-05a) and ignores it (handler is a no-op). On startup, writes `$$` to a marker file using `printf '%s'` and writes `"ready"` to stderr, then sleeps indefinitely. Used for SIGKILL-after-grace-period tests (T-SIG-05 / T-SIG-05a). | | `spawn-grandchild(markerPath)` | bash | Spawns a background subprocess (e.g., `sleep 3600 &`), writes both `$$` (script PID) and `$!` (grandchild PID) to a marker file (one per line) using `printf '%s\n'`, writes `"ready"` to stderr, then `wait`s. For process group signal tests (T-SIG-06). | | `write-pid-to-file(markerPath)` | ts | Writes `process.pid` to a marker file using `fs.writeFileSync`, writes `"ready"` to stderr, then runs a long-running operation (e.g., `setTimeout(() => {}, 999999)`). Used for API cancellation tests (T-API-09a, T-API-10a) where a JS/TS script is needed. | | `counter(file)` | bash | Appends "1" to a counter file each invocation, outputs count as result | **Fixture naming note:** The `emit-*` fixtures replace the previous `echo-*` fixtures. `printf` is used instead of `echo` to provide exact byte control — `echo` appends a trailing newline which can mangle exact byte expectations in parser tests. For tests that specifically need to verify trailing-newline handling, use `emit-raw-ln`. The `write-*-to-file` fixtures observe values via the filesystem rather than CLI stdout, since the CLI never prints `result` to its own stdout (Spec 7.1). All bash `write-*-to-file` fixtures use `printf '%s'` (not `echo`) to write values, ensuring exact-byte safety for trailing spaces, backslashes, and values starting with `-`. The `observe-env` fixture is a TS-based alternative to `write-env-to-file` that writes structured JSON (`{ "present": boolean, "value"?: string }`) to a marker file using `fs.writeFileSync` for exact-byte safety. Use `observe-env` when a test must distinguish between a variable being unset vs set to an empty string — `write-env-to-file` (bash `printf '%s' "$VAR"`) produces identical output for both cases. +**cwd-observation fixture selection.** Three fixtures observe working-directory-related state and must not be confused, because Spec 6.1 explicitly partitions them: + +- `write-cwd-to-file` (bash) — authoritative for Bash cwd assertions. Calls `/bin/pwd -P`, which reports the kernel cwd spelling via `getcwd(3)`. Do **not** use `$PWD` for cwd assertions: `$PWD` is shell-maintained, inherited unchanged from loopx, and Spec 6.1 disclaims any byte-for-byte equality between `$PWD` and the spawn cwd. +- `observe-cwd-ts` (ts) — authoritative for JS/TS cwd assertions. Writes `process.cwd()` via `fs.writeFileSync`. Node/Bun's `process.cwd()` typically canonicalizes via `getcwd(3)`, so the runtime may report a different string spelling than loopx used to spawn — directory identity (device/inode) is guaranteed; string spelling is not. +- `observe-pwd` (bash) — retained **only** for a narrow class of tests that specifically want to observe Bash's shell-managed `$PWD` variable. **Not used for exact-value `PWD` passthrough assertions** — per Spec 6.1 / 8.3 / 13, Bash may rewrite `$PWD` during shell startup under normal POSIX / Bash semantics, so byte-for-byte equality between Bash `$PWD` and a caller-supplied value is not a loopx guarantee. Tests that want to verify "loopx does not set `PWD`" or "`RunOptions.env`-supplied `PWD` reaches the child" (T-PWD-01 / 02 / 03 / 06) use `observe-env` with variable name `PWD` instead — that TS fixture reads `process.env.PWD` directly under Node / Bun, which preserves the inherited value byte-for-byte and bypasses Bash's shell-level rewrite. + +**Tmpdir/workflow-dir observation fixtures.** See `observe-env` (reused with `LOOPX_TMPDIR` and `LOOPX_WORKFLOW_DIR` as the variable name) for structured `{ "present": bool, "value"?: string }` marker files needed by the section 7.4 and `LOOPX_WORKFLOW_DIR` tests. A test that must both observe the env var **and** read/write files inside it (e.g., tmpdir persistence-across-iterations tests) uses `observe-env` plus a follow-up `write-value-to-file` / read pass inside the reported path. + **Signal/cancellation fixtures:** The `signal-ready-then-sleep`, `signal-trap-exit`, `signal-trap-ignore`, `spawn-grandchild`, and `write-pid-to-file` fixtures are purpose-built for signal and cancellation tests. They all follow a common protocol: (1) write PID(s) to a marker file on startup, (2) write `"ready"` to stderr, (3) block. The stderr marker allows `waitForStderr("ready")` to synchronize the test harness before sending signals, preventing races. The marker file PIDs allow post-test verification that processes were actually killed. **Bash JSON safety warning:** The `emit-result`, `emit-goto`, and `emit-result-goto` fixtures use `printf` with `%s` substitution to produce JSON. This is only safe for simple string values that do not contain double quotes (`"`), backslashes (`\`), newlines, or other JSON-special characters — these would produce malformed JSON. For tests that require exact-byte control, JSON-special characters in values, or arbitrary binary content, use the `stdout-writer` TS fixture (which reads a pre-written payload from disk) or `emit-raw`/`emit-raw-ln` (which output exact bytes without JSON framing). @@ -418,6 +857,8 @@ Each test is identified by a unique ID (`T-
-`), references a SP #### Help & Version - **T-CLI-01**: `loopx version` prints the bare package version string followed by a newline, exits 0. Assert exact stdout is `${version}\n` — the spec requires a trailing newline, so assert against the untrimmed stdout, not a trimmed comparison. No additional text or labels. Does not require `.loopx/` to exist. *(Spec 4.3, 5.4)* +- **T-CLI-01a**: `loopx version extra` (extra positional after `version`) → usage error, exit 1. SPEC 4.3 specifies `loopx version` as a no-argument subcommand ("Prints the installed version of loopx to stdout and exits") and SPEC 12's usage-error enumeration uses non-exhaustive language ("Usage errors (exit code 1) include: …"), with the consistent pattern that extra positionals to fixed-grammar subcommands are usage errors (SPEC 12 explicitly lists `loopx run ralph bar` as the run-side analogue). Other no-argument subcommands have extra-argument coverage (e.g., T-CLI-RUN-DASHDASH-01 for `loopx run --`); the version-subcommand surface is uncovered. Run `loopx version extra`. Assert: (a) exit code 1, (b) stderr surfaces a usage / unrecognized-argument error (the exact wording is implementation-defined; the test does not pin a specific phrase but does require the failure category to be a parser-level usage error), (c) stdout does **not** contain the version string (the version-print short-circuit must not fire when the parser rejects the invocation). A buggy implementation that ignored extra positionals on `version` and printed the version anyway would surface as a stdout containing the version string (failing assertion (c)) and exit code 0 (failing (a)). The related `loopx version --help` / `-h` grammar is covered by T-CLI-01b under the same most-natural reading; both tests are outside the ADR-0004 surface and are maintained at the SPEC 4.3 / 11 / 12 grammar layer. *(Spec 4.3, 12)* +- **T-CLI-01b**: `loopx version --help` and `loopx version -h` → usage error, exit 1. SPEC §11 (Help) defines exactly three help forms — top-level (11.1), run (11.2), and install (11.3) — with no "Version Help" section. SPEC 4.3's `loopx version` definition does not document a help form for the subcommand. The deliberate omission combined with the non-exhaustive usage-error enumeration in SPEC 12 makes the consistent reading "extra arguments to a no-argument subcommand are usage errors", which subsumes `--help` / `-h` as unrecognized arguments at the version-subcommand parser level. **Note:** SPEC is silent on this exact case; the test encodes the most-natural reading of SPEC 4.3 / 11 / 12 (usage error) and is outside the ADR-0004 surface — a follow-up SPEC clarification at the SPEC 4.3 / 11 layer would let this test be re-classified as a direct conformance pin instead of a grammar-derived reading. Run two variants: (a) `loopx version --help`, and (b) `loopx version -h`. For each variant, assert: (i) exit code 1, (ii) stderr surfaces a usage / unrecognized-argument error (the exact wording is implementation-defined), (iii) stdout does **not** contain the version string, and (iv) stdout does **not** contain a version-scoped help block (no "Usage: loopx version ..." or similar synopsis text — the version subcommand has no help form per SPEC §11). A buggy implementation that interpreted `--help` as a top-level help short-circuit when reached via the `version` subcommand path would print top-level help and exit 0, failing (i)–(iv). *(Spec 4.3, 11, 12)* #### Top-Level Help @@ -434,11 +875,13 @@ Each test is identified by a unique ID (`T-
-`), references a SP - **T-CLI-61**: `loopx --help run foo` shows top-level help (not run help) and exits 0. Same behavior as `loopx -h run foo` — verifies that the `--help` long form has identical precedence semantics to `-h` when followed by additional arguments. *(Spec 4.2)* - **T-CLI-90**: `loopx --help --invalid-flag` prints top-level help and exits 0 (top-level `--help` takes precedence over invalid flags). This is the `--help` long-form counterpart to T-CLI-07g (`loopx -h --invalid-flag`), verifying both help spellings suppress subsequent invalid flags identically. *(Spec 4.2)* - **T-CLI-91**: `loopx --help -e nonexistent.env` prints top-level help and exits 0 (top-level `--help` takes precedence over `-e`). The nonexistent env file is not read or validated. This is the `--help` long-form counterpart to T-CLI-07j (`loopx -h -e nonexistent.env`). *(Spec 4.2)* +- **T-CLI-TOP-DASHDASH-01**: Top-level `loopx --` rejected as an unrecognized top-level token. SPEC 4.2 / 4.3 / 12 do not list `--` as a recognized top-level flag or separator — the only top-level surfaces are help (`-h` / `--help`), the recognized subcommands (`run`, `version`, `output`, `env`, `install`), and parser-level validation. T-CLI-RUN-DASHDASH-01 covers `loopx run --` (which is a `run`-subcommand-scoped concern); this test pins down the **top-level** parser's rejection of `--` as a usage error. Run `loopx --` (no subcommand, no other arguments). Assert: (a) exit code 1 (usage error per SPEC 12), (b) stderr mentions an unrecognized argument / missing subcommand / unrecognized `--` token (the exact wording is implementation-defined; the test does not pin a specific message phrase but does require the error to surface a usage-error category), (c) no subcommand was dispatched (no version output, no help short-circuit, no install or run side effects). A buggy implementation that silently consumed `--` at the top-level (e.g., as a no-op separator) would proceed to a "no subcommand provided" path or a top-level-help path; both are observable distinct failure modes. *(Spec 4.2, 4.3, 12)* #### Run Help - **T-CLI-40**: `loopx run -h` with `.loopx/` containing workflows prints run-specific help that includes: (a) run syntax showing the target form (e.g., `loopx run [options] [: