From 435e7aeff44e66f20b04a0d43e427cee3c8c0dbe Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Fri, 17 Apr 2026 15:55:19 +0200
Subject: [PATCH 01/38] Add Docker live run-artifact regression test.

Protect the host-mounted .jaiph/runs contract by asserting Docker-backed runs create and grow step .out/.err files before the workflow exits.

Made-with: Cursor
---
 e2e/tests/75_docker_live_step_output.sh | 112 ++++++++++++++++++++++++
 1 file changed, 112 insertions(+)
 create mode 100644 e2e/tests/75_docker_live_step_output.sh
diff --git a/e2e/tests/75_docker_live_step_output.sh b/e2e/tests/75_docker_live_step_output.sh
new file mode 100644
index 00000000..ee76f709
--- /dev/null
+++ b/e2e/tests/75_docker_live_step_output.sh
@@ -0,0 +1,112 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+source "${ROOT_DIR}/e2e/lib/common.sh"
+trap e2e::cleanup EXIT
+
+e2e::prepare_test_env "docker_live_step_output"
+TEST_DIR="${JAIPH_E2E_TEST_DIR}"
+
+# Gate on Docker availability — skip gracefully when Docker is not installed.
+if ! command -v docker >/dev/null 2>&1 || ! docker info >/dev/null 2>&1; then
+  e2e::section "docker live step output (skipped — Docker unavailable)"
+  e2e::skip "Docker is not available, skipping Docker live artifact test"
+  exit 0
+fi
+
+e2e::section "docker step .out/.err files grow live during execution"
+
+e2e::file "live_out_docker.jh" <<'WORKFLOW'
+script slow_writer_impl = ```
+echo "line-1"
+echo "err-1" >&2
+sleep 1
+echo "line-2"
+echo "err-2" >&2
+sleep 1
+echo "line-3"
+echo "err-3" >&2
+```
+rule slow_writer() {
+  run slow_writer_impl()
+}
+
+workflow default() {
+  ensure slow_writer()
+}
+WORKFLOW
+
+run_err="$(mktemp)"
+JAIPH_DOCKER_ENABLED=true jaiph run "${TEST_DIR}/live_out_docker.jh" 2>"${run_err}" &
+run_pid=$!
+
+out_file=""
+err_file=""
+for _ in $(seq 1 50); do
+  sleep 0.1
+  shopt -s nullglob
+  out_candidates=( "${TEST_DIR}/.jaiph/runs/"*/*"live_out_docker.jh/"*slow_writer_impl.out )
+  err_candidates=( "${TEST_DIR}/.jaiph/runs/"*/*"live_out_docker.jh/"*slow_writer_impl.err )
+  shopt -u nullglob
+  if [[ ${#out_candidates[@]} -ge 1 && ${#err_candidates[@]} -ge 1 ]]; then
+    out_file="${out_candidates[0]}"
+    err_file="${err_candidates[0]}"
+    break
+  fi
+done
+
+sleep 1
+if ! kill -0 "$run_pid" 2>/dev/null; then
+  e2e::fail "docker run finished before live sample; increase slow_writer duration"
+fi
+
+mid_out_size=""
+mid_err_size=""
+if [[ -n "$out_file" && -f "$out_file" ]]; then
+  mid_out_size="$(wc -c < "$out_file")"
+fi
+if [[ -n "$err_file" && -f "$err_file" ]]; then
+  mid_err_size="$(wc -c < "$err_file")"
+fi
+
+wait "$run_pid" || true
+rm -f "${run_err}"
+
+if [[ -z "$out_file" || -z "$err_file" ]]; then
+  e2e::fail "docker out/err files never appeared during execution"
+fi
+
+if [[ -z "$mid_out_size" || "$mid_out_size" -eq 0 ]]; then
+  e2e::fail "docker out file was empty when sampled mid-execution (mid_out_size=${mid_out_size:-<empty>})"
+fi
+if [[ -z "$mid_err_size" || "$mid_err_size" -eq 0 ]]; then
+  e2e::fail "docker err file was empty when sampled mid-execution (mid_err_size=${mid_err_size:-<empty>})"
+fi
+
+final_out_size="$(wc -c < "$out_file")"
+if [[ "$final_out_size" -gt "$mid_out_size" ]]; then
+  e2e::pass "docker out file grew live: mid=${mid_out_size}B final=${final_out_size}B"
+elif [[ "$final_out_size" -eq "$mid_out_size" && "$final_out_size" -gt 0 ]]; then
+  e2e::pass "docker out file was live-written (sampled ${mid_out_size}B, final ${final_out_size}B)"
+else
+  e2e::fail "docker out file did not grow (mid=${mid_out_size}B final=${final_out_size}B)"
+fi
+
+final_err_size="$(wc -c < "$err_file")"
+if [[ "$final_err_size" -gt "$mid_err_size" ]]; then
+  e2e::pass "docker err file grew live: mid=${mid_err_size}B final=${final_err_size}B"
+elif [[ "$final_err_size" -eq "$mid_err_size" && "$final_err_size" -gt 0 ]]; then
+  e2e::pass "docker err file was live-written (sampled ${mid_err_size}B, final ${final_err_size}B)"
+else
+  e2e::fail "docker err file did not grow (mid=${mid_err_size}B final=${final_err_size}B)"
+fi
+
+final_content="$(<"$out_file")"
+expected_out="$(printf 'line-1\nline-2\nline-3')"
+e2e::assert_equals "${final_content}" "${expected_out}" "docker final .out content"
+
+final_err_content="$(<"$err_file")"
+expected_err="$(printf 'err-1\nerr-2\nerr-3')"
+e2e::assert_equals "${final_err_content}" "${expected_err}" "docker final .err content"

From 81e9aa3904e5a02ee366378d9dd03237dc50c278 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Fri, 17 Apr 2026 20:38:27 +0200
Subject: [PATCH 02/38] Fix explicit nested managed calls in Docker runs.

Keep nested run/ensure calls explicit across validation, formatting, and runtime execution, and make Docker use the local Jaiph package with a writable workspace fallback so container behavior matches local runs.

Made-with: Cursor
---
 .jaiph/architect_review.jh                   |   2 +-
 QUEUE.md                                     |  90 +++++++++++++-
 src/cli/commands/run.ts                      |   4 +-
 src/format/emit.ts                           |  18 +++
 src/runtime/docker.test.ts                   |  26 ++--
 src/runtime/docker.ts                        | 123 +++++++++++++++++--
 src/runtime/kernel/node-workflow-runtime.ts  | 108 +++++++++++++++-
 src/transpile/validate-managed-calls.test.ts |  23 ++++
 src/transpile/validate.ts                    |  60 +++++++++
 test/sample-build.test.ts                    |  34 +++++
 10 files changed, 457 insertions(+), 31 deletions(-)

diff --git a/.jaiph/architect_review.jh b/.jaiph/architect_review.jh
index a85f59e0..22fa919b 100755
--- a/.jaiph/architect_review.jh
+++ b/.jaiph/architect_review.jh
@@ -102,7 +102,7 @@ workflow review_one_header(header) {
     const verdict = run first_line_str(packed)
     const updated_description = run rest_lines_str(packed)
     const body_file = run jaiph_review_body_file()
-    run mkdir_p_simple(run, jaiph_tmp_dir())
+    run mkdir_p_simple(run jaiph_tmp_dir())
     run str_equals(verdict, "dev-ready") catch (err) {
       run arg_nonempty(updated_description) catch (err) {
         fail "needs-work requires a non-empty updated_description (questions for the author)."
diff --git a/QUEUE.md b/QUEUE.md
index 5e70340b..7f3cd348 100644
--- a/QUEUE.md
+++ b/QUEUE.md
@@ -12,6 +12,93 @@ Process rules:
 
 ***
 
+## Runtime/DSL — require explicit managed calls inside nested argument lists #dev-ready
+
+**Goal**
+Keep managed execution explicit everywhere, including inside argument lists. Nested argument-position managed calls are valid **only** when they use the correct keyword:
+
+* `run foo(run bar())`
+* `run foo(ensure rule_bar())`
+* `run foo(run \`echo "aaa"\`())`
+
+The bare call-like forms must stay invalid:
+
+* `run foo(bar())`
+* `run foo(rule_bar())`
+* `run foo(\`echo "aaa"\`())`
+* `const x = bar()`
+
+The explicit capture-then-pass form is also valid:
+
+* `const x = run bar()`
+* `run foo(x)`
+
+This is a deliberate language rule: scripts/workflows are only executable via `run`, and rules are only executable via `ensure`, even when nested inside another call's args.
+
+**Why this task exists**
+The runtime bug that treated `run foo(run bar())` as two literal argv tokens (`"run"`, `"bar()"`) proves the language intent is not being enforced correctly. The fix is to support **explicit** nested managed calls and reject **implicit** bare call-like execution.
+
+**Context**
+
+* Parser / call-arg parsing: `src/parse/core.ts`, `src/parse/workflows.ts`, `src/parse/steps.ts`
+* Validation: `src/transpile/validate.ts`, `src/transpile/validate-managed-calls.test.ts`
+* Runtime arg handling: `src/runtime/kernel/node-workflow-runtime.ts`
+* Formatting / round-trip emit: `src/format/emit.ts`
+* Real motivating workflow: `.jaiph/architect_review.jh`
+
+**Implementation requirements**
+
+1. **Language rule**
+   * Managed steps remain explicit everywhere.
+   * Inside call arguments, nested execution is allowed only via:
+     * `run ref(...)`
+     * `ensure ref(...)`
+     * `run \`...\`()`
+   * Bare nested call-like forms are invalid and must not be interpreted as executable.
+
+2. **Compiler behavior**
+   * Accept explicit nested managed calls in arg position.
+   * Reject bare nested call-like syntax in arg position with a clear parse/validate error.
+   * Reject bare call-like execution in `const` assignments as well.
+   * Error message must tell the user to add the missing `run` / `ensure` keyword, not to rely on implicit execution.
+
+3. **Runtime behavior**
+   * Runtime arg evaluation must execute explicit nested managed calls correctly and pass their resulting value as a single argument.
+   * Runtime must never interpret a bare call-like token as executable work.
+
+4. **Formatter / emitter**
+   * Ensure formatting round-trips valid nested explicit forms like `run foo(run bar())`.
+   * Ensure invalid bare nested call-like forms are not generated by the emitter.
+
+5. **Workflow update**
+   * Keep or update `.jaiph/architect_review.jh` to use the explicit nested form:
+     * `run mkdir_p_simple(run jaiph_tmp_dir())`
+
+6. **Tests**
+   * Add/keep a regression test proving `run mkdir_p_simple(jaiph_tmp_dir())` is rejected.
+   * Add/keep a regression test proving `run foo(rule_bar())` is rejected.
+   * Add/keep a regression test proving `run foo(\`echo "aaa"\`())` is rejected.
+   * Add/keep a regression test proving `const x = bar()` is rejected.
+   * Add/keep a positive test for `run mkdir_p_simple(run jaiph_tmp_dir())`.
+   * Add/keep a positive test for `run foo(ensure rule_bar())`.
+   * Add/keep a positive test for nested inline script form `run foo(run \`echo "aaa"\`())`.
+   * Add/keep a positive test for `const x = run bar()` followed by `run foo(x)`.
+
+**Acceptance criteria**
+
+* `run foo(run bar())` succeeds and passes the nested result as one argument.
+* `run foo(ensure rule_bar())` succeeds and passes the nested result as one argument.
+* `run foo(run \`echo "aaa"\`())` succeeds and passes the nested result as one argument.
+* `run foo(bar())` fails at compile time with an actionable error.
+* `run foo(rule_bar())` fails at compile time with an actionable error.
+* `run foo(\`echo "aaa"\`())` fails at compile time with an actionable error.
+* `const x = run bar()` is accepted.
+* `const x = bar()` fails at compile time with an actionable error.
+* `.jaiph/architect_review.jh` uses the explicit nested form.
+* Tests prevent regression back to implicit bare nested execution.
+
+***
+
 ## Docker — strict image contract + publish official `jaiph-runtime` images to GHCR
 
 **Goal**
@@ -232,4 +319,5 @@ Extend the language with workflow parameters: `workflow analyze(file: string, de
 * Errors produce `isError: true` responses (no server crash).
 * E2E test passes.
 
-***
\ No newline at end of file
+***
+
diff --git a/src/cli/commands/run.ts b/src/cli/commands/run.ts
index 570f02d5..1a87ceab 100644
--- a/src/cli/commands/run.ts
+++ b/src/cli/commands/run.ts
@@ -381,7 +381,9 @@ function reportResult(
     }
   }
   const runtimeDebugEnabled = runtimeEnv.JAIPH_DEBUG === "true";
-  const runtimeErrorPrinted = hasFatalRuntimeStderr(capturedStderr, runtimeDebugEnabled);
+  const runtimeErrorPrinted = sandboxRunDir
+    ? false
+    : hasFatalRuntimeStderr(capturedStderr, runtimeDebugEnabled);
   const resolvedStatus = exitStatus !== 0 || runtimeErrorPrinted ? 1 : 0;
 
   emitter.emit("workflow_end", {
diff --git a/src/format/emit.ts b/src/format/emit.ts
index 30a9e6c4..bd7c7e08 100644
--- a/src/format/emit.ts
+++ b/src/format/emit.ts
@@ -13,6 +13,7 @@ import type {
   WorkflowMetadata,
   TopLevelEmitOrder,
 } from "../types";
+import { parseCallRef } from "../parse/core";
 
 export interface EmitOptions {
   indent: number;
@@ -378,6 +379,23 @@ function formatArgs(args: string, bareIdentifierArgs?: string[]): string {
   while (i < args.length) {
     while (i < args.length && (args[i] === " " || args[i] === "\t")) i++;
     if (i >= args.length) break;
+    const tail = args.slice(i);
+    const keyword = tail.startsWith("run ")
+      ? "run"
+      : tail.startsWith("ensure ")
+        ? "ensure"
+        : null;
+    if (keyword) {
+      const afterKeyword = args.slice(i + keyword.length).trimStart();
+      const skipped = args.slice(i + keyword.length).length - afterKeyword.length;
+      const call = parseCallRef(afterKeyword);
+      if (call && (call.rest.length === 0 || /^\s/.test(call.rest))) {
+        const consumed = afterKeyword.length - call.rest.length;
+        tokens.push(`${keyword} ${call.ref}(${formatArgs(call.args ?? "", call.bareIdentifierArgs)})`);
+        i += keyword.length + skipped + consumed;
+        continue;
+      }
+    }
     if (args[i] === '"') {
       let j = i + 1;
       while (j < args.length && !(args[j] === '"' && args[j - 1] !== "\\")) j++;
diff --git a/src/runtime/docker.test.ts b/src/runtime/docker.test.ts
index ce422847..9afbe728 100644
--- a/src/runtime/docker.test.ts
+++ b/src/runtime/docker.test.ts
@@ -207,7 +207,7 @@ test("resolveDockerConfig: workspace from in-file", () => {
 // buildDockerArgs
 // ---------------------------------------------------------------------------
 
-test("buildDockerArgs: workspace ro + overlay-ro + sandbox run rw + fuse device", () => {
+test("buildDockerArgs: workspace-ro + sandbox run rw + fuse device", () => {
   const opts = defaultOpts({ runArgs: ["arg1"] });
   const args = buildDockerArgs(opts, TEST_OVERLAY);
 
@@ -223,15 +223,11 @@ test("buildDockerArgs: workspace ro + overlay-ro + sandbox run rw + fuse device"
 
   const vFlags = args.filter((_, i) => i > 0 && args[i - 1] === "-v");
 
-  // Workspace ro
-  const wsMount = vFlags.find((v) => v.includes("/jaiph/workspace:"));
-  assert.ok(wsMount, "workspace mount present");
-  assert.ok(wsMount!.endsWith(":ro"), "workspace must be ro");
-
   // Overlay lower-layer ro
   const wsRoMount = vFlags.find((v) => v.includes("/jaiph/workspace-ro:"));
   assert.ok(wsRoMount, "workspace-ro mount present");
   assert.ok(wsRoMount!.endsWith(":ro"), "workspace-ro must be ro");
+  assert.ok(!vFlags.some((v) => v.includes("/jaiph/workspace:")), "workspace mount must stay writable inside image");
 
   // Sandbox run dir rw
   const runMount = vFlags.find((v) => v.includes("/jaiph/run:"));
@@ -243,8 +239,8 @@ test("buildDockerArgs: workspace ro + overlay-ro + sandbox run rw + fuse device"
   assert.ok(overlayMount, "overlay script mount present");
   assert.ok(overlayMount!.endsWith(":ro"), "overlay script must be ro");
 
-  // Total: 2 workspace (primary + -ro) + 1 run + 1 overlay script = 4
-  assert.equal(vFlags.length, 4);
+  // Total: 1 workspace-ro + 1 run + 1 overlay script = 3
+  assert.equal(vFlags.length, 3);
 
   // Command: overlay-run.sh → jaiph run --raw <source>
   assert.ok(args.includes("/jaiph/overlay-run.sh"));
@@ -290,7 +286,7 @@ test("buildDockerArgs: overrides JAIPH_WORKSPACE and JAIPH_RUNS_DIR", () => {
   assert.ok(!args.some((a) => a === "JAIPH_RUNS_DIR=/host/runs"));
 });
 
-test("buildDockerArgs: multiple workspace mounts all forced ro", () => {
+test("buildDockerArgs: multiple workspace mounts only lower-layer paths are mounted ro", () => {
   const opts = defaultOpts({
     config: {
       ...defaultOpts().config,
@@ -302,11 +298,11 @@ test("buildDockerArgs: multiple workspace mounts all forced ro", () => {
   });
   const args = buildDockerArgs(opts, TEST_OVERLAY);
   const vFlags = args.filter((_, i) => i > 0 && args[i - 1] === "-v");
-  // 2 configured × 2 (primary + -ro) + 1 run + 1 overlay script = 6
-  assert.equal(vFlags.length, 6);
-  assert.ok(vFlags.some((v) => v.includes("/jaiph/workspace:") && v.endsWith(":ro")));
+  // 2 configured lower-layer mounts + 1 run + 1 overlay script = 4
+  assert.equal(vFlags.length, 4);
+  assert.ok(!vFlags.some((v) => v.includes("/jaiph/workspace:") && v.endsWith(":ro")));
   assert.ok(vFlags.some((v) => v.includes("/jaiph/workspace-ro:") && v.endsWith(":ro")));
-  assert.ok(vFlags.some((v) => v.includes("/jaiph/workspace/config:") && v.endsWith(":ro")));
+  assert.ok(!vFlags.some((v) => v.includes("/jaiph/workspace/config:") && v.endsWith(":ro")));
   assert.ok(vFlags.some((v) => v.includes("/jaiph/workspace-ro/config:") && v.endsWith(":ro")));
 });
 
@@ -413,6 +409,10 @@ test("writeOverlayScript: creates executable script with fuse-overlayfs setup",
     const content = readFileSync(scriptPath, "utf8");
     assert.ok(content.startsWith("#!/usr/bin/env bash"));
     assert.ok(content.includes("fuse-overlayfs"));
+    assert.ok(content.includes("workspace overlay unavailable"));
+    assert.ok(content.includes("using copy fallback"));
+    assert.ok(content.includes('rsync -a --delete "$LOWER"/ "$MERGED"/'));
+    assert.ok(content.includes("mktemp \"$MERGED/.jaiph-overlay-probe.XXXXXX\""));
     assert.ok(content.includes('exec "$@"'));
   } finally {
     rmSync(dirname(scriptPath), { recursive: true, force: true });
diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts
index be76a754..3382c1a3 100644
--- a/src/runtime/docker.ts
+++ b/src/runtime/docker.ts
@@ -201,10 +201,14 @@ function installedPackageRoot(): string {
 function autoRuntimeImageTag(baseImage: string, packageRoot: string): string {
   const packageJsonPath = join(packageRoot, "package.json");
   const cliPath = join(packageRoot, "dist", "src", "cli.js");
+  const dockerRuntimePath = join(packageRoot, "dist", "src", "runtime", "docker.js");
+  const nodeWorkflowRuntimePath = join(packageRoot, "dist", "src", "runtime", "kernel", "node-workflow-runtime.js");
   const packageStamp = existsSync(packageJsonPath) ? statSync(packageJsonPath).mtimeMs : 0;
   const cliStamp = existsSync(cliPath) ? statSync(cliPath).mtimeMs : 0;
+  const dockerRuntimeStamp = existsSync(dockerRuntimePath) ? statSync(dockerRuntimePath).mtimeMs : 0;
+  const nodeWorkflowRuntimeStamp = existsSync(nodeWorkflowRuntimePath) ? statSync(nodeWorkflowRuntimePath).mtimeMs : 0;
   const digest = createHash("sha256")
-    .update(`${baseImage}|${resolve(packageRoot)}|${packageStamp}|${cliStamp}`)
+    .update(`${baseImage}|${resolve(packageRoot)}|${packageStamp}|${cliStamp}|${dockerRuntimeStamp}|${nodeWorkflowRuntimeStamp}`)
     .digest("hex")
     .slice(0, 12);
   return `${AUTO_RUNTIME_IMAGE_REPO}:${digest}`;
@@ -223,6 +227,40 @@ function imageHasJaiph(image: string): boolean {
   }
 }
 
+function imageConfiguredUser(image: string): string | undefined {
+  try {
+    const raw = execFileSync(
+      "docker",
+      ["image", "inspect", image, "--format", "{{json .Config.User}}"],
+      { encoding: "utf8", timeout: 30_000 },
+    ).trim();
+    const parsed = JSON.parse(raw) as string;
+    return parsed.length > 0 ? parsed : undefined;
+  } catch {
+    return undefined;
+  }
+}
+
+function imageHomeDir(image: string): string | undefined {
+  try {
+    const raw = execFileSync(
+      "docker",
+      ["image", "inspect", image, "--format", "{{json .Config.Env}}"],
+      { encoding: "utf8", timeout: 30_000 },
+    ).trim();
+    const envList = JSON.parse(raw) as string[];
+    for (const entry of envList) {
+      if (entry.startsWith("HOME=")) {
+        const value = entry.slice("HOME=".length);
+        return value.length > 0 ? value : undefined;
+      }
+    }
+  } catch {
+    // Fall through.
+  }
+  return undefined;
+}
+
 function buildRuntimeImageFromLocalPackage(baseImage: string, packageRoot: string, tag: string): string {
   const contextDir = mkdtempSync(join(tmpdir(), "jaiph-runtime-image-"));
   try {
@@ -234,12 +272,19 @@ function buildRuntimeImageFromLocalPackage(baseImage: string, packageRoot: strin
     if (!tarballName) {
       throw new Error("npm pack produced no tarball");
     }
+    const originalUser = imageConfiguredUser(baseImage);
+    const originalHome = imageHomeDir(baseImage);
     writeFileSync(
       join(contextDir, "Dockerfile"),
       [
         `FROM ${baseImage}`,
+        `USER root`,
         `COPY ${tarballName} /tmp/${tarballName}`,
-        `RUN npm install -g /tmp/${tarballName} && rm -f /tmp/${tarballName}`,
+        `RUN npm install -g /tmp/${tarballName} && rm -f /tmp/${tarballName}` +
+        (originalHome
+          ? ` && JAIPH_NPM_BIN="$(npm prefix -g)/bin/jaiph" && mkdir -p ${originalHome}/.local/bin && ln -sf "$JAIPH_NPM_BIN" ${originalHome}/.local/bin/jaiph`
+          : ""),
+        ...(originalUser ? [`USER ${originalUser}`] : []),
         "",
       ].join("\n"),
     );
@@ -255,6 +300,18 @@ function buildRuntimeImageFromLocalPackage(baseImage: string, packageRoot: strin
   }
 }
 
+function ensureLocalRuntimeImage(baseImage: string): string {
+  pullImageIfNeeded(baseImage);
+  const packageRoot = installedPackageRoot();
+  const tag = autoRuntimeImageTag(baseImage, packageRoot);
+  try {
+    execSync(`docker image inspect ${tag}`, { stdio: "ignore", timeout: 30_000 });
+    return tag;
+  } catch {
+    return buildRuntimeImageFromLocalPackage(baseImage, packageRoot, tag);
+  }
+}
+
 function ensureImageHasJaiph(baseImage: string): string {
   pullImageIfNeeded(baseImage);
   if (imageHasJaiph(baseImage)) {
@@ -285,6 +342,7 @@ export function resolveImage(config: DockerRunConfig, workspaceRoot: string): st
     if (existsSync(dockerfilePath)) {
       baseImage = buildImageFromDockerfile(dockerfilePath);
     }
+    return ensureLocalRuntimeImage(baseImage);
   }
   return ensureImageHasJaiph(baseImage);
 }
@@ -299,9 +357,51 @@ LOWER=/jaiph/workspace-ro
 UPPER=/tmp/overlay-upper
 WORK=/tmp/overlay-work
 MERGED=/jaiph/workspace
-mkdir -p "$UPPER" "$WORK"
+mkdir -p "$UPPER" "$WORK" "$MERGED"
+overlay_ok=0
+overlay_reason=""
 if command -v fuse-overlayfs >/dev/null 2>&1 && [ -e /dev/fuse ]; then
-  fuse-overlayfs -o "lowerdir=$LOWER,upperdir=$UPPER,workdir=$WORK" "$MERGED" 2>/dev/null || true
+  if fuse-overlayfs -o "lowerdir=$LOWER,upperdir=$UPPER,workdir=$WORK" "$MERGED" 2>/tmp/jaiph-fuse-overlay.err; then
+    probe_path="$(mktemp "$MERGED/.jaiph-overlay-probe.XXXXXX" 2>/dev/null || true)"
+    if [ -n "$probe_path" ]; then
+      rm -f "$probe_path"
+      overlay_ok=1
+    else
+      overlay_reason="fuse-overlayfs mounted but workspace is still not writable"
+    fi
+  else
+    overlay_reason="$(tr '\n' ' ' </tmp/jaiph-fuse-overlay.err | sed 's/[[:space:]]\+/ /g; s/^ //; s/ $//')"
+  fi
+else
+  overlay_reason="fuse-overlayfs unavailable or /dev/fuse missing"
+fi
+if [ "$overlay_ok" -ne 1 ]; then
+  if command -v rsync >/dev/null 2>&1; then
+    if rsync -a --delete "$LOWER"/ "$MERGED"/ 2>/tmp/jaiph-workspace-copy.err; then
+      printf 'jaiph docker: workspace overlay unavailable; using copy fallback at /jaiph/workspace' >&2
+      if [ -n "$overlay_reason" ]; then
+        printf ' (%s)' "$overlay_reason" >&2
+      fi
+      printf '\n' >&2
+      overlay_ok=1
+    else
+      copy_reason="$(tr '\n' ' ' </tmp/jaiph-workspace-copy.err | sed 's/[[:space:]]\+/ /g; s/^ //; s/ $//')"
+      printf 'jaiph docker: workspace overlay unavailable and copy fallback failed; /jaiph/workspace may be incomplete' >&2
+      if [ -n "$overlay_reason" ]; then
+        printf ' (%s)' "$overlay_reason" >&2
+      fi
+      if [ -n "$copy_reason" ]; then
+        printf ' [copy fallback: %s]' "$copy_reason" >&2
+      fi
+      printf '\n' >&2
+    fi
+  else
+    printf 'jaiph docker: workspace overlay unavailable and rsync copy fallback is unavailable; /jaiph/workspace may be incomplete' >&2
+    if [ -n "$overlay_reason" ]; then
+      printf ' (%s)' "$overlay_reason" >&2
+    fi
+    printf '\n' >&2
+  fi
 fi
 exec "$@"
 `;
@@ -386,13 +486,13 @@ export function overlayMountPath(containerPath: string): string {
  * Build the `docker run --rm` argument list.
  *
  * Mounts:
- *  1. workspace → /jaiph/workspace:ro  (fallback when overlay absent)
- *  2. workspace → /jaiph/workspace-ro:ro  (overlay lower layer)
- *  3. sandboxRunDir → /jaiph/run:rw  (single run artifacts)
+ *  1. workspace → /jaiph/workspace-ro:ro  (overlay lower layer / copy source)
+ *  2. sandboxRunDir → /jaiph/run:rw       (single run artifacts)
  *
- * overlay-run.sh (baked in image) creates a fuse-overlayfs CoW at
- * /jaiph/workspace using -ro as lower.  /jaiph/run is outside the overlay
- * so writes go directly to the host mount — no symlink needed.
+ * The image already contains a writable `/jaiph/workspace` directory.
+ * `overlay-run.sh` mounts `fuse-overlayfs` there when available; otherwise it
+ * copies the lower layer into that directory as a writable fallback. `/jaiph/run`
+ * is outside the overlay, so run artifacts still persist to the host mount.
  *
  * The container runs `jaiph run --raw <file>` using its own installed jaiph.
  */
@@ -415,10 +515,9 @@ export function buildDockerArgs(opts: DockerSpawnOptions, overlayScriptPath: str
     args.push("--network", opts.config.network);
   }
 
-  // Workspace: ro at primary path (fallback) + overlay lower layer path
+  // Workspace inputs: mounted only at the overlay lower-layer path.
   for (const mount of opts.config.mounts) {
     const hostAbs = resolve(opts.workspaceRoot, mount.hostPath);
-    args.push("-v", `${hostAbs}:${mount.containerPath}:ro`);
     args.push("-v", `${hostAbs}:${overlayMountPath(mount.containerPath)}:ro`);
   }
 
diff --git a/src/runtime/kernel/node-workflow-runtime.ts b/src/runtime/kernel/node-workflow-runtime.ts
index abc01561..c6ee76ad 100644
--- a/src/runtime/kernel/node-workflow-runtime.ts
+++ b/src/runtime/kernel/node-workflow-runtime.ts
@@ -12,6 +12,7 @@ import { buildStepDisplayParamPairs } from "../../cli/commands/format-params.js"
 import { resolveRuleRef, resolveScriptRef, resolveWorkflowRef, type RuntimeGraph } from "./graph";
 import type { WorkflowMetadata } from "../../types";
 import { extractJson, validateFields } from "./schema";
+import { parseCallRef } from "../../parse/core";
 import {
   plainMultilineOrchestrationForRuntime,
   tripleQuotedRawForRuntime,
@@ -159,6 +160,79 @@ function parseArgsRaw(raw: string, vars: Map<string, string>, env?: NodeJS.Proce
   return out;
 }
 
+type ParsedArgToken =
+  | { kind: "literal"; value: string }
+  | { kind: "managed"; managedKind: "run" | "ensure"; ref: string; argsRaw: string };
+
+function parseManagedArgAt(raw: string, start: number): { token: ParsedArgToken; next: number } | null {
+  const tail = raw.slice(start);
+  const keyword = tail.startsWith("run ")
+    ? "run"
+    : tail.startsWith("ensure ")
+      ? "ensure"
+      : null;
+  if (!keyword) return null;
+  const afterKeyword = raw.slice(start + keyword.length).trimStart();
+  const skipped = raw.slice(start + keyword.length).length - afterKeyword.length;
+  const call = parseCallRef(afterKeyword);
+  if (!call) return null;
+  if (call.rest.length > 0 && !/^\s/.test(call.rest)) return null;
+  const consumed = afterKeyword.length - call.rest.length;
+  return {
+    token: {
+      kind: "managed",
+      managedKind: keyword,
+      ref: call.ref,
+      argsRaw: call.args ?? "",
+    },
+    next: start + keyword.length + skipped + consumed,
+  };
+}
+
+function parseArgTokens(raw: string): ParsedArgToken[] {
+  if (!raw.trim()) return [];
+  const out: ParsedArgToken[] = [];
+  let i = 0;
+  while (i < raw.length) {
+    while (i < raw.length && /\s/.test(raw[i]!)) i += 1;
+    if (i >= raw.length) break;
+    const managed = parseManagedArgAt(raw, i);
+    if (managed) {
+      out.push(managed.token);
+      i = managed.next;
+      continue;
+    }
+    let cur = "";
+    let quote: "'" | '"' | null = null;
+    while (i < raw.length) {
+      const ch = raw[i]!;
+      if (quote) {
+        if (ch === quote) {
+          quote = null;
+        } else {
+          cur += ch;
+        }
+        i += 1;
+        continue;
+      }
+      if (ch === "'" || ch === '"') {
+        quote = ch;
+        i += 1;
+        continue;
+      }
+      if (/\s/.test(ch)) {
+        break;
+      }
+      cur += ch;
+      i += 1;
+    }
+    if (cur.length > 0) {
+      out.push({ kind: "literal", value: cur });
+    }
+  }
+  return out;
+}
+
 function stripOuterQuotes(value: string): string {
   if (value.length >= 2) {
     const first = value[0];
@@ -1223,8 +1297,32 @@ export class NodeWorkflowRuntime {
     return `${filePath}::${name}`;
   }
 
+  private async resolveArgsRaw(scope: Scope, raw: string | string[]): Promise<string[] | StepResult> {
+    if (Array.isArray(raw)) {
+      return raw;
+    }
+    const tokens = parseArgTokens(raw);
+    const resolved: string[] = [];
+    for (const token of tokens) {
+      if (token.kind === "literal") {
+        resolved.push(interpolate(token.value, scope.vars, scope.env));
+        continue;
+      }
+      const result = token.managedKind === "run"
+        ? await this.executeRunRef(scope, token.ref, token.argsRaw)
+        : await this.executeEnsureRef(scope, token.ref, token.argsRaw, undefined);
+      if (result.status !== 0) {
+        return result;
+      }
+      resolved.push(result.returnValue ?? result.output.trim());
+    }
+    return resolved;
+  }
+
   private async executeRunRef(scope: Scope, ref: string, argsRaw: string | string[]): Promise<StepResult> {
-    const args = Array.isArray(argsRaw) ? argsRaw : parseArgsRaw(argsRaw, scope.vars, scope.env);
+    const resolvedArgs = await this.resolveArgsRaw(scope, argsRaw);
+    if (!Array.isArray(resolvedArgs)) return resolvedArgs;
+    const args = resolvedArgs;
     const resolvedWorkflow = resolveWorkflowRef(this.graph, scope.filePath, { value: ref, loc: { line: 1, col: 1 } });
     if (resolvedWorkflow) {
       const mk = this.mockKey(resolvedWorkflow.filePath, resolvedWorkflow.workflow.name);
@@ -1263,7 +1361,9 @@ export class NodeWorkflowRuntime {
     argsRaw: string,
     recover: EnsureRecover | undefined,
   ): Promise<StepResult> {
-    const args = parseArgsRaw(argsRaw, scope.vars, scope.env);
+    const resolvedArgs = await this.resolveArgsRaw(scope, argsRaw);
+    if (!Array.isArray(resolvedArgs)) return resolvedArgs;
+    const args = resolvedArgs;
     const attempt = async (): Promise<StepResult> => {
       const resolvedRule = resolveRuleRef(this.graph, scope.filePath, { value: ref, loc: { line: 1, col: 1 } });
       if (!resolvedRule) return { status: 1, output: "", error: `Unknown ensure target: ${ref}` };
@@ -1353,7 +1453,9 @@ export class NodeWorkflowRuntime {
     shebang: string | undefined,
     argsRaw: string,
   ): Promise<StepResult> {
-    const args = parseArgsRaw(argsRaw, scope.vars, scope.env);
+    const resolvedArgs = await this.resolveArgsRaw(scope, argsRaw);
+    if (!Array.isArray(resolvedArgs)) return resolvedArgs;
+    const args = resolvedArgs;
     const scriptName = inlineScriptName(body, shebang);
     return this.executeManagedStep(
       "script",
diff --git a/src/transpile/validate-managed-calls.test.ts b/src/transpile/validate-managed-calls.test.ts
index 828a4890..d5bb238b 100644
--- a/src/transpile/validate-managed-calls.test.ts
+++ b/src/transpile/validate-managed-calls.test.ts
@@ -184,6 +184,29 @@ test("bare identifier arg: unknown name fails E_VALIDATE", () => {
   }
 });
 
+test("E_VALIDATE: nested call-like arg requires explicit run or ensure", () => {
+  const root = mkdtempSync(join(tmpdir(), "jaiph-val-nested-call-"));
+  try {
+    writeFileSync(
+      join(root, "m.jh"),
+      [
+        'script mkdir_p_simple = `mkdir -p "$1"`',
+        'script jaiph_tmp_dir = `printf "%s\\n" "$JAIPH_WORKSPACE/.jaiph/tmp"`',
+        "workflow default() {",
+        "  run mkdir_p_simple(jaiph_tmp_dir())",
+        "}",
+        "",
+      ].join("\n"),
+    );
+    assert.throws(
+      () => buildScripts(join(root, "m.jh"), join(root, "out")),
+      /nested managed calls in argument position must be explicit/,
+    );
+  } finally {
+    rmSync(root, { recursive: true, force: true });
+  }
+});
+
 test("bare identifier arg: capture variable passes validation", () => {
   const root = mkdtempSync(join(tmpdir(), "jaiph-val-bare-cap-"));
   const out = join(root, "out");
diff --git a/src/transpile/validate.ts b/src/transpile/validate.ts
index edb2b747..5b3d18ba 100644
--- a/src/transpile/validate.ts
+++ b/src/transpile/validate.ts
@@ -257,6 +257,53 @@ function validateBareIdentifierArgs(
   }
 }
 
+function stripQuotedArgContent(args: string): string {
+  let out = "";
+  let quote: "'" | '"' | null = null;
+  for (let i = 0; i < args.length; i += 1) {
+    const ch = args[i]!;
+    if (quote) {
+      if (ch === quote && args[i - 1] !== "\\") {
+        quote = null;
+      }
+      out += " ";
+      continue;
+    }
+    if (ch === "'" || ch === '"') {
+      quote = ch;
+      out += " ";
+      continue;
+    }
+    out += ch;
+  }
+  return out;
+}
+
+function validateNestedManagedCallArgs(
+  filePath: string,
+  loc: { line: number; col: number },
+  args: string | undefined,
+): void {
+  if (!args) return;
+  const stripped = stripQuotedArgContent(args);
+  const re = /\b([A-Za-z_][A-Za-z0-9_.]*)\s*\(/g;
+  let match: RegExpExecArray | null;
+  while ((match = re.exec(stripped)) !== null) {
+    const before = stripped.slice(0, match.index).trimEnd();
+    const lastToken = before.length === 0 ? "" : before.slice(before.lastIndexOf(" ") + 1);
+    if (lastToken === "run" || lastToken === "ensure") {
+      continue;
+    }
+    throw jaiphError(
+      filePath,
+      loc.line,
+      loc.col,
+      "E_VALIDATE",
+      `nested managed calls in argument position must be explicit; use "run ${match[1]}(...)" or "ensure ${match[1]}(...)" inside the argument list`,
+    );
+  }
+}
+
 /** Resolve a route target workflow ref to its declared parameter count. Returns undefined if unresolvable. */
 function resolveRouteTargetParams(
   ref: string,
@@ -474,6 +521,7 @@ export function validateReferences(ast: jaiphModule, ctx: ValidateContext): void
       }
       if (s.type === "ensure") {
         validateNoShellRedirection(ast.filePath, s.ref.loc, "ensure", s.args);
+        validateNestedManagedCallArgs(ast.filePath, s.ref.loc, s.args);
         validateRef(s.ref, ast, refCtx, expectRuleRef);
         validateArity(ast.filePath, s.ref.loc, s.ref.value, s.args, "rule", ast, refCtx);
 
@@ -488,6 +536,7 @@ export function validateReferences(ast: jaiphModule, ctx: ValidateContext): void
       }
       if (s.type === "run") {
         validateNoShellRedirection(ast.filePath, s.workflow.loc, "run", s.args);
+        validateNestedManagedCallArgs(ast.filePath, s.workflow.loc, s.args);
         if (s.async) {
           throw jaiphError(
             ast.filePath,
@@ -572,12 +621,14 @@ export function validateReferences(ast: jaiphModule, ctx: ValidateContext): void
         if (s.managed) {
           if (s.managed.kind === "run") {
             validateNoShellRedirection(ast.filePath, s.managed.ref.loc, "run", s.managed.args);
+            validateNestedManagedCallArgs(ast.filePath, s.managed.ref.loc, s.managed.args);
             validateRef(s.managed.ref, ast, refCtx, expectRunInRuleRef);
             validateArity(ast.filePath, s.managed.ref.loc, s.managed.ref.value, s.managed.args, "workflow", ast, refCtx);
 
             validateBareIdentifierArgs(ast.filePath, s.managed.ref.loc, s.managed.bareIdentifierArgs, ruleKnownVars);
           } else if (s.managed.kind === "ensure") {
             validateNoShellRedirection(ast.filePath, s.managed.ref.loc, "ensure", s.managed.args);
+            validateNestedManagedCallArgs(ast.filePath, s.managed.ref.loc, s.managed.args);
             validateRef(s.managed.ref, ast, refCtx, expectRuleRef);
             validateArity(ast.filePath, s.managed.ref.loc, s.managed.ref.value, s.managed.args, "rule", ast, refCtx);
 
@@ -610,6 +661,7 @@ export function validateReferences(ast: jaiphModule, ctx: ValidateContext): void
         const v = s.value;
         if (v.kind === "run_capture") {
           validateNoShellRedirection(ast.filePath, v.ref.loc, "run", v.args);
+          validateNestedManagedCallArgs(ast.filePath, v.ref.loc, v.args);
           if (!v.ref.value.includes(".") && ruleKnownVars.has(v.ref.value) && !localScripts.has(v.ref.value)) {
             throw jaiphError(ast.filePath, v.ref.loc.line, v.ref.loc.col, "E_VALIDATE", `strings are not executable; "${v.ref.value}" is a string — use a script instead`);
           }
@@ -619,6 +671,7 @@ export function validateReferences(ast: jaiphModule, ctx: ValidateContext): void
           validateBareIdentifierArgs(ast.filePath, v.ref.loc, v.bareIdentifierArgs, ruleKnownVars);
         } else if (v.kind === "ensure_capture") {
           validateNoShellRedirection(ast.filePath, v.ref.loc, "ensure", v.args);
+          validateNestedManagedCallArgs(ast.filePath, v.ref.loc, v.args);
           validateRef(v.ref, ast, refCtx, expectRuleRef);
           validateArity(ast.filePath, v.ref.loc, v.ref.value, v.args, "rule", ast, refCtx);
 
@@ -765,6 +818,7 @@ export function validateReferences(ast: jaiphModule, ctx: ValidateContext): void
         validateChannelRef(s.channel, s.loc);
         if (s.rhs.kind === "run") {
           validateNoShellRedirection(ast.filePath, s.rhs.ref.loc, "run", s.rhs.args);
+          validateNestedManagedCallArgs(ast.filePath, s.rhs.ref.loc, s.rhs.args);
           validateRef(s.rhs.ref, ast, refCtx, expectRunTargetRef);
           validateArity(ast.filePath, s.rhs.ref.loc, s.rhs.ref.value, s.rhs.args, "workflow", ast, refCtx);
 
@@ -799,6 +853,7 @@ export function validateReferences(ast: jaiphModule, ctx: ValidateContext): void
       }
       if (s.type === "ensure") {
         validateNoShellRedirection(ast.filePath, s.ref.loc, "ensure", s.args);
+        validateNestedManagedCallArgs(ast.filePath, s.ref.loc, s.args);
         validateRef(s.ref, ast, refCtx, expectRuleRef);
         validateArity(ast.filePath, s.ref.loc, s.ref.value, s.args, "rule", ast, refCtx);
 
@@ -813,6 +868,7 @@ export function validateReferences(ast: jaiphModule, ctx: ValidateContext): void
       }
       if (s.type === "run") {
         validateNoShellRedirection(ast.filePath, s.workflow.loc, "run", s.args);
+        validateNestedManagedCallArgs(ast.filePath, s.workflow.loc, s.args);
         if (!s.workflow.value.includes(".") && wfKnownVars.has(s.workflow.value) && !localScripts.has(s.workflow.value) && !localWorkflows.has(s.workflow.value)) {
           throw jaiphError(ast.filePath, s.workflow.loc.line, s.workflow.loc.col, "E_VALIDATE", `strings are not executable; "${s.workflow.value}" is a string — use a script instead`);
         }
@@ -899,12 +955,14 @@ export function validateReferences(ast: jaiphModule, ctx: ValidateContext): void
         if (s.managed) {
           if (s.managed.kind === "run") {
             validateNoShellRedirection(ast.filePath, s.managed.ref.loc, "run", s.managed.args);
+            validateNestedManagedCallArgs(ast.filePath, s.managed.ref.loc, s.managed.args);
             validateRef(s.managed.ref, ast, refCtx, expectRunTargetRef);
             validateArity(ast.filePath, s.managed.ref.loc, s.managed.ref.value, s.managed.args, "workflow", ast, refCtx);
 
             validateBareIdentifierArgs(ast.filePath, s.managed.ref.loc, s.managed.bareIdentifierArgs, wfKnownVars, recoverBindings);
           } else if (s.managed.kind === "ensure") {
             validateNoShellRedirection(ast.filePath, s.managed.ref.loc, "ensure", s.managed.args);
+            validateNestedManagedCallArgs(ast.filePath, s.managed.ref.loc, s.managed.args);
             validateRef(s.managed.ref, ast, refCtx, expectRuleRef);
             validateArity(ast.filePath, s.managed.ref.loc, s.managed.ref.value, s.managed.args, "rule", ast, refCtx);
 
@@ -957,6 +1015,7 @@ export function validateReferences(ast: jaiphModule, ctx: ValidateContext): void
         const v = s.value;
         if (v.kind === "run_capture") {
           validateNoShellRedirection(ast.filePath, v.ref.loc, "run", v.args);
+          validateNestedManagedCallArgs(ast.filePath, v.ref.loc, v.args);
           if (!v.ref.value.includes(".") && wfKnownVars.has(v.ref.value) && !localScripts.has(v.ref.value) && !localWorkflows.has(v.ref.value)) {
             throw jaiphError(ast.filePath, v.ref.loc.line, v.ref.loc.col, "E_VALIDATE", `strings are not executable; "${v.ref.value}" is a string — use a script instead`);
           }
@@ -966,6 +1025,7 @@ export function validateReferences(ast: jaiphModule, ctx: ValidateContext): void
           validateBareIdentifierArgs(ast.filePath, v.ref.loc, v.bareIdentifierArgs, wfKnownVars, recoverBindings);
         } else if (v.kind === "ensure_capture") {
           validateNoShellRedirection(ast.filePath, v.ref.loc, "ensure", v.args);
+          validateNestedManagedCallArgs(ast.filePath, v.ref.loc, v.args);
           validateRef(v.ref, ast, refCtx, expectRuleRef);
           validateArity(ast.filePath, v.ref.loc, v.ref.value, v.args, "rule", ast, refCtx);
 
diff --git a/test/sample-build.test.ts b/test/sample-build.test.ts
index 06d7f89e..2d8439bd 100644
--- a/test/sample-build.test.ts
+++ b/test/sample-build.test.ts
@@ -173,6 +173,40 @@ test("jaiph run compiles and executes workflow with args", () => {
   }
 });
 
+test("jaiph run resolves nested managed call arguments", () => {
+  const root = mkdtempSync(join(tmpdir(), "jaiph-run-nested-args-"));
+  try {
+    const filePath = join(root, "nested_args.jh");
+    writeFileSync(
+      filePath,
+      [
+        "script mkdir_p_simple = ```",
+        'mkdir -p "$1"',
+        "```",
+        "script jaiph_tmp_dir = ```",
+        'printf "%s\\n" "$JAIPH_WORKSPACE/.jaiph/tmp"',
+        "```",
+        "workflow default() {",
+        "  run mkdir_p_simple(run jaiph_tmp_dir())",
+        "}",
+        "",
+      ].join("\n"),
+    );
+
+    const cliPath = join(process.cwd(), "dist/src/cli.js");
+    const runResult = spawnSync("node", [cliPath, "run", filePath], {
+      encoding: "utf8",
+      cwd: root,
+      env: { ...process.env, JAIPH_DOCKER_ENABLED: "false" },
+    });
+
+    assert.equal(runResult.status, 0, runResult.stderr);
+    assert.equal(existsSync(join(root, ".jaiph", "tmp")), true);
+  } finally {
+    rmSync(root, { recursive: true, force: true });
+  }
+});
+
 test("executable .jh invokes jaiph run semantics", () => {
   const root = mkdtempSync(join(tmpdir(), "jaiph-exec-jh-"));
   try {

From d25fb3f8dd89ad1a5001af381cbf5cfed34ba9d9 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Fri, 17 Apr 2026 21:07:56 +0200
Subject: [PATCH 03/38] Feat: Require explicit managed calls in nested argument
 lists

Enforce that nested call-like expressions inside argument positions must
use an explicit `run` or `ensure` keyword. Bare call-like forms
(`run foo(bar())`, `run foo(rule_bar())`, `run foo(\`echo x\`())`,
`const x = bar()`) are now rejected at compile time with actionable
error messages. The explicit forms (`run foo(run bar())`,
`run foo(ensure rule_bar())`, `run foo(run \`echo x\`())`) execute
the nested call first and pass the result as a single argument.

Validator extended with inline script detection, runtime evaluates
managed argument tokens before outer dispatch, and the formatter
round-trips all valid nested forms. Regression tests cover all
accepted and rejected patterns. Docs and grammar updated.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CHANGELOG.md                                 |   4 +
 QUEUE.md                                     |  88 -----------
 docs/grammar.md                              |  34 +++-
 docs/jaiph-skill.md                          |   2 +-
 docs/language.md                             |  30 ++++
 src/format/emit.ts                           |  37 +++++
 src/runtime/docker.ts                        |  21 ++-
 src/runtime/kernel/node-workflow-runtime.ts  |  89 +++++++++--
 src/transpile/validate-managed-calls.test.ts | 157 +++++++++++++++++++
 src/transpile/validate.ts                    |  17 ++
 10 files changed, 371 insertions(+), 108 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index eda20425..cd5409d9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,9 @@
 # Unreleased
 
+## All changes
+
+- **Feature — Language/Runtime:** Explicit nested managed calls in argument position — Call arguments can now contain nested managed calls using `run` or `ensure` keywords explicitly: `run foo(run bar())`, `run foo(ensure rule_bar())`, and `run foo(run \`echo "aaa"\`())`. The nested call executes first and its result is passed as a single argument to the outer call. Bare call-like forms in argument position are rejected at compile time: `run foo(bar())` → `E_VALIDATE` with an actionable message telling the user to add `run` or `ensure`. Bare inline script calls in argument position (`run foo(\`echo aaa\`())`) are also rejected with guidance. The explicit capture-then-pass form (`const x = run bar()` followed by `run foo(x)`) remains valid. Bare call-like forms in `const` assignments (`const x = bar()`) are also rejected — use `const x = run bar()`. The formatter round-trips explicit nested forms correctly, including the inline script variant. The runtime evaluates nested managed argument tokens (workflows, scripts, rules, and inline scripts) before passing the result to the outer call. Implementation: validator (`src/transpile/validate.ts` — `validateNestedManagedCallArgs` extended for inline script detection), runtime (`src/runtime/kernel/node-workflow-runtime.ts` — `managed_inline_script` token kind, `parseInlineScriptAt`, `resolveArgsRawSync` fast path), formatter (`src/format/emit.ts` — `parseInlineScriptArg`, inline script formatting in `formatArgs`). Regression tests added for all valid and invalid forms. Docs updated (`docs/language.md`, `docs/grammar.md`, `docs/jaiph-skill.md`).
+
 # 0.9.2
 
 ## Summary
diff --git a/QUEUE.md b/QUEUE.md
index 7f3cd348..993c8f50 100644
--- a/QUEUE.md
+++ b/QUEUE.md
@@ -12,93 +12,6 @@ Process rules:
 
 ***
 
-## Runtime/DSL — require explicit managed calls inside nested argument lists #dev-ready
-
-**Goal**
-Keep managed execution explicit everywhere, including inside argument lists. Nested argument-position managed calls are valid **only** when they use the correct keyword:
-
-* `run foo(run bar())`
-* `run foo(ensure rule_bar())`
-* `run foo(run \`echo "aaa"\`())`
-
-The bare call-like forms must stay invalid:
-
-* `run foo(bar())`
-* `run foo(rule_bar())`
-* `run foo(\`echo "aaa"\`())`
-* `const x = bar()`
-
-The explicit capture-then-pass form is also valid:
-
-* `const x = run bar()`
-* `run foo(x)`
-
-This is a deliberate language rule: scripts/workflows are only executable via `run`, and rules are only executable via `ensure`, even when nested inside another call's args.
-
-**Why this task exists**
-The runtime bug that treated `run foo(run bar())` as two literal argv tokens (`"run"`, `"bar()"`) proves the language intent is not being enforced correctly. The fix is to support **explicit** nested managed calls and reject **implicit** bare call-like execution.
-
-**Context**
-
-* Parser / call-arg parsing: `src/parse/core.ts`, `src/parse/workflows.ts`, `src/parse/steps.ts`
-* Validation: `src/transpile/validate.ts`, `src/transpile/validate-managed-calls.test.ts`
-* Runtime arg handling: `src/runtime/kernel/node-workflow-runtime.ts`
-* Formatting / round-trip emit: `src/format/emit.ts`
-* Real motivating workflow: `.jaiph/architect_review.jh`
-
-**Implementation requirements**
-
-1. **Language rule**
-   * Managed steps remain explicit everywhere.
-   * Inside call arguments, nested execution is allowed only via:
-     * `run ref(...)`
-     * `ensure ref(...)`
-     * `run \`...\`()`
-   * Bare nested call-like forms are invalid and must not be interpreted as executable.
-
-2. **Compiler behavior**
-   * Accept explicit nested managed calls in arg position.
-   * Reject bare nested call-like syntax in arg position with a clear parse/validate error.
-   * Reject bare call-like execution in `const` assignments as well.
-   * Error message must tell the user to add the missing `run` / `ensure` keyword, not to rely on implicit execution.
-
-3. **Runtime behavior**
-   * Runtime arg evaluation must execute explicit nested managed calls correctly and pass their resulting value as a single argument.
-   * Runtime must never interpret a bare call-like token as executable work.
-
-4. **Formatter / emitter**
-   * Ensure formatting round-trips valid nested explicit forms like `run foo(run bar())`.
-   * Ensure invalid bare nested call-like forms are not generated by the emitter.
-
-5. **Workflow update**
-   * Keep or update `.jaiph/architect_review.jh` to use the explicit nested form:
-     * `run mkdir_p_simple(run jaiph_tmp_dir())`
-
-6. **Tests**
-   * Add/keep a regression test proving `run mkdir_p_simple(jaiph_tmp_dir())` is rejected.
-   * Add/keep a regression test proving `run foo(rule_bar())` is rejected.
-   * Add/keep a regression test proving `run foo(\`echo "aaa"\`())` is rejected.
-   * Add/keep a regression test proving `const x = bar()` is rejected.
-   * Add/keep a positive test for `run mkdir_p_simple(run jaiph_tmp_dir())`.
-   * Add/keep a positive test for `run foo(ensure rule_bar())`.
-   * Add/keep a positive test for nested inline script form `run foo(run \`echo "aaa"\`())`.
-   * Add/keep a positive test for `const x = run bar()` followed by `run foo(x)`.
-
-**Acceptance criteria**
-
-* `run foo(run bar())` succeeds and passes the nested result as one argument.
-* `run foo(ensure rule_bar())` succeeds and passes the nested result as one argument.
-* `run foo(run \`echo "aaa"\`())` succeeds and passes the nested result as one argument.
-* `run foo(bar())` fails at compile time with an actionable error.
-* `run foo(rule_bar())` fails at compile time with an actionable error.
-* `run foo(\`echo "aaa"\`())` fails at compile time with an actionable error.
-* `const x = run bar()` is accepted.
-* `const x = bar()` fails at compile time with an actionable error.
-* `.jaiph/architect_review.jh` uses the explicit nested form.
-* Tests prevent regression back to implicit bare nested execution.
-
-***
-
 ## Docker — strict image contract + publish official `jaiph-runtime` images to GHCR
 
 **Goal**
@@ -320,4 +233,3 @@ Extend the language with workflow parameters: `workflow analyze(file: string, de
 * E2E test passes.
 
 ***
-
diff --git a/docs/grammar.md b/docs/grammar.md
index 68e10383..d3707263 100644
--- a/docs/grammar.md
+++ b/docs/grammar.md
@@ -195,6 +195,33 @@ This rule applies to all call sites: `run`, `ensure`, `return run`/`return ensur
 
 Bare identifiers must reference a known variable (`const`, capture, or named parameter). Unknown names produce an `E_VALIDATE` error at compile time. Jaiph keywords (`run`, `ensure`, `const`, etc.) cannot be used as bare identifier arguments.
 
+### Nested Managed Calls in Arguments
+
+Call arguments can contain **explicit nested managed calls** using `run` or `ensure`. The nested call executes first and its result is passed as a single argument to the outer call. This is a deliberate language rule: managed execution must always be explicit — scripts and workflows execute only via `run`, rules only via `ensure`, even inside argument lists.
+
+**Valid explicit forms:**
+
+```jaiph
+run mkdir_p_simple(run jaiph_tmp_dir())      # nested run
+run do_work(ensure check_ok())               # nested ensure
+run do_work(run `echo aaa`())                # nested inline script
+```
+
+**Invalid bare call-like forms** — rejected at compile time with actionable errors:
+
+```jaiph
+# run do_work(bar())           — E_VALIDATE: nested managed calls must be explicit
+# run do_work(rule_bar())      — E_VALIDATE: nested managed calls must be explicit
+# run do_work(`echo aaa`())    — E_VALIDATE: nested inline scripts must be explicit
+```
+
+The **capture-then-pass** form is always valid:
+
+```jaiph
+const x = run bar()
+run foo(x)
+```
+
 ### Arity Checking
 
 When the callee declares named parameters, the compiler validates that the number of arguments at the call site matches the number of declared parameters. A mismatch produces an `E_VALIDATE` error:
@@ -841,6 +868,10 @@ ensure_stmt     = "ensure" call_ref [ "catch" catch_bindings catch_body ] ;
 run_catch_stmt  = "run" call_ref "catch" catch_bindings catch_body ;
 run_stmt        = "run" ( call_ref | inline_script ) ;
 call_ref        = REF "(" [ call_args ] ")" ;  (* parentheses always required *)
+call_arg        = double_quoted_string | IDENT | "${" IDENT "}"
+                | "run" ( call_ref | inline_script )       (* explicit nested managed call *)
+                | "ensure" call_ref ;                      (* explicit nested ensure *)
+call_args       = call_arg { "," call_arg } ;
 inline_script   = backtick_script_body "(" [ call_args ] ")" | fenced_script_block "(" [ call_args ] ")" ;
 prompt_body     = double_quoted_string | IDENT | triple_quoted_block ;
 triple_quoted_block = "\"\"\"" newline { body_line newline } "\"\"\"" ;
@@ -861,7 +892,7 @@ After parsing, the compiler validates references and config (`src/transpile/vali
 
 - **E_PARSE:** Invalid syntax — duplicate config, invalid keys/values, `$(…)` or `${var:-fallback}` in orchestration strings, `${...}` interpolation in script bodies, `prompt … returns` without capture, bare `ref(args)` in const RHS (use `run`/`ensure`/`prompt`), `local` at top level, unrecognized workflow/rule line, invalid send RHS, arguments after `catch`, bare `catch` with no recovery step, nested inline captures, shell redirection after `run`/`ensure`, invalid parameter names (non-identifier, duplicate, or reserved keyword), or missing `{` on definition line.
 - **E_SCHEMA:** Invalid `returns` schema — empty, non-flat, unsupported type (only `string`, `number`, `boolean`).
-- **E_VALIDATE:** Reference errors — unknown rule/workflow, duplicate alias, `ensure` on non-rule, `run` on rule, `run` to workflow inside rule, `run async` in rule, forbidden Jaiph usage inside `$(…)`, dot notation on non-prompt variable or invalid field name, bare identifier argument referencing an unknown variable, `${identifier}` in strings referencing an unknown variable, standalone `"${identifier}"` in call arguments (use bare identifier instead), arity mismatch (call-site argument count differs from callee's declared parameter count), **type crossing** — `prompt` with a script name (`scripts are not promptable`), `run` with a string const (`strings are not executable`), `const x = scriptName` (`scripts are not values`), `${scriptName}` interpolation (`scripts cannot be interpolated`).
+- **E_VALIDATE:** Reference errors — unknown rule/workflow, duplicate alias, `ensure` on non-rule, `run` on rule, `run` to workflow inside rule, `run async` in rule, forbidden Jaiph usage inside `$(…)`, dot notation on non-prompt variable or invalid field name, bare identifier argument referencing an unknown variable, `${identifier}` in strings referencing an unknown variable, standalone `"${identifier}"` in call arguments (use bare identifier instead), arity mismatch (call-site argument count differs from callee's declared parameter count), **bare nested managed calls** — `run foo(bar())` or `run foo(rule_bar())` without explicit `run`/`ensure` keyword, **bare nested inline script calls** — `run foo(\`echo aaa\`())` without explicit `run`, **type crossing** — `prompt` with a script name (`scripts are not promptable`), `run` with a string const (`strings are not executable`), `const x = scriptName` (`scripts are not values`), `${scriptName}` interpolation (`scripts cannot be interpolated`).
 - **E_IMPORT_NOT_FOUND:** Import target file does not exist.
 
 Validation rules:
@@ -875,6 +906,7 @@ Validation rules:
 7. `ensure … catch` and `run … catch` argument ordering: all arguments inside parentheses before `catch`.
 8. Shell redirection (`>`, `|`, `&`) after `run`/`ensure` is rejected — use a script.
 9. **Type crossing:** `string` and `script` are non-interchangeable primitive types (see [Types](#types)). `prompt` rejects script names; `run` rejects string consts; assigning a script to a `const` or interpolating a script name with `${…}` is rejected. Each crossing produces an actionable `E_VALIDATE` message.
+10. **Explicit nested managed calls:** Bare call-like forms in argument position (`run foo(bar())`, `run foo(rule_bar())`) are rejected — add the missing `run` or `ensure` keyword. Bare inline script calls in arguments (`run foo(\`echo aaa\`())`) are also rejected — add `run`. Valid forms: `run foo(run bar())`, `run foo(ensure rule_bar())`, `run foo(run \`echo aaa\`())`.
 
 ## Build Artifacts {#build-artifacts}
 
diff --git a/docs/jaiph-skill.md b/docs/jaiph-skill.md
index a7dd9c30..f170b6ef 100644
--- a/docs/jaiph-skill.md
+++ b/docs/jaiph-skill.md
@@ -95,7 +95,7 @@ Prefer composable modules over one large file.
 - **Module-scoped variables:** `local name = value` or `const name = value` (same value forms). Prefer **`const`** for new files. Values can be single-line `"..."` strings, triple-quoted `"""..."""` multiline strings, or bare tokens. A double-quoted string that spans multiple lines is rejected — use `"""..."""` instead. Accessible as `${name}` inside orchestration strings in the same module. Names share the unified namespace with channels, rules, workflows, and scripts — duplicates are `E_PARSE`. Not exportable; module-scoped only.
 - **Steps:**
   - **ensure** — `ensure ref` or `ensure ref([args...])` runs a rule (local or `alias.rule_name`). **Parentheses are optional when passing zero arguments** — `ensure check` is equivalent to `ensure check()`. When arguments are present, parentheses are required with comma-separated expressions. **Bare identifier arguments** are supported and preferred: `ensure check(status)` is equivalent to `ensure check("${status}")` — the identifier must reference a known variable (`const`, capture, or named parameter); unknown names fail with `E_VALIDATE`. **Standalone `"${identifier}"` in call arguments is rejected** — use the bare form instead. Optionally `ensure ref([args]) catch (<name>) <body>` or `ensure ref([args]) catch (<name>, <attempt>) <body>`: the recovery body runs **once** on failure (like a catch clause). There is no retry loop — for retries, use explicit recursion. The first binding (e.g. `failure`) receives the full merged stdout+stderr from the failed rule execution, including output from nested scripts and rules. The optional second binding (e.g. `attempt`) receives the attempt number (always `"1"`). Full output still lives in step **`.out` / `.err`** artifacts. If the failure binding is empty for your rule, persist diagnostics before prompting or assert non-empty. Works in both workflows and rules.
-  - **run** — `run ref` or `run ref([args...])` runs a workflow or script (local or `alias.name`). **Parentheses are optional when passing zero arguments** — `run setup` is equivalent to `run setup()`. When arguments are present, parentheses are required with comma-separated expressions. **`run` does not forward args by default** — pass named params explicitly (e.g. `run wf(task)`, `run util_fn(name)`). **Bare identifier arguments** are supported and preferred: `run greet(name)` is equivalent to `run greet("${name}")` — the identifier must reference a known variable (`const`, capture, or named parameter); unknown names fail with `E_VALIDATE`. **Standalone `"${identifier}"` in call arguments is rejected** — use the bare form instead (e.g. `run greet(name)` not `run greet("${name}")`). Quoted strings with additional text around the interpolation (e.g. `"prefix_${name}"`) are still allowed. Jaiph keywords cannot be used as bare identifiers. Optionally `run ref([args]) catch (<name>) <body>`: the recovery body runs **once** on failure (same semantics as `ensure … catch`). Works in both workflows and rules. Also supports **inline scripts**: `` run `body`(args) `` or `` run ```lang...body...```(args) `` — see Scripts section above.
+  - **run** — `run ref` or `run ref([args...])` runs a workflow or script (local or `alias.name`). **Parentheses are optional when passing zero arguments** — `run setup` is equivalent to `run setup()`. When arguments are present, parentheses are required with comma-separated expressions. **`run` does not forward args by default** — pass named params explicitly (e.g. `run wf(task)`, `run util_fn(name)`). **Bare identifier arguments** are supported and preferred: `run greet(name)` is equivalent to `run greet("${name}")` — the identifier must reference a known variable (`const`, capture, or named parameter); unknown names fail with `E_VALIDATE`. **Standalone `"${identifier}"` in call arguments is rejected** — use the bare form instead (e.g. `run greet(name)` not `run greet("${name}")`). Quoted strings with additional text around the interpolation (e.g. `"prefix_${name}"`) are still allowed. Jaiph keywords cannot be used as bare identifiers. **Nested managed calls in arguments** are supported with explicit keywords: `run foo(run bar())`, `run foo(ensure check())`, `run foo(run \`echo ok\`())`. Bare call-like forms in arguments (`run foo(bar())`, `run foo(\`echo ok\`())`) are rejected — add the `run` or `ensure` keyword. Optionally `run ref([args]) catch (<name>) <body>`: the recovery body runs **once** on failure (same semantics as `ensure … catch`). Works in both workflows and rules. Also supports **inline scripts**: `` run `body`(args) `` or `` run ```lang...body...```(args) `` — see Scripts section above.
   - **log** — `log "message"` writes the expanded message to **stdout** and emits a **`LOG`** event; the CLI shows it in the progress tree at the current depth. Double-quoted string; `${identifier}` interpolation works at runtime. For multiline messages, use triple quotes: `log """..."""`. **Bare identifier form:** `log foo` (no quotes) expands to `log "${foo}"` — the variable's value is logged. Works with `const`, capture, and named parameters. **Inline capture interpolation** is also supported: `${run ref([args])}` and `${ensure ref([args])}` execute a managed call and inline the result (e.g. `log "Got: ${run greet()}"`). Nested inline captures are rejected. **`LOG`** events and `run_summary.jsonl` store the **same** message string (JSON-escaped for the payload). No spinner, no timing — a static annotation. See [CLI Reference](cli.md) for tree formatting. Useful for marking workflow phases (e.g. `log "Starting analysis phase"`).
   - **logerr** — `logerr "message"` is identical to `log` except the message goes to **stderr** and the event type is **`LOGERR`**. In the progress tree, `logerr` lines use a red `!` instead of the dim `ℹ` used by `log`. Same quoting, interpolation, bare identifier, and triple-quote rules as `log` (e.g. `logerr err_msg`, `logerr """..."""`).
   - **Send** — After `<-`, use a **double-quoted literal**, **triple-quoted block** (`channel <- """..."""`), **`${var}`**, or **`run ref([args])`**. An explicit RHS is always required — bare `channel <-` (forward syntax) has been removed. Raw shell on the RHS is rejected — use `const x = run helper()` then `channel <- "${x}"`, or `channel <- run fmt_fn()`. Combining capture and send (`name = channel <- …`) is `E_PARSE`. See [Inbox & Dispatch](inbox.md).
diff --git a/docs/language.md b/docs/language.md
index 504b0872..c1a82d16 100644
--- a/docs/language.md
+++ b/docs/language.md
@@ -248,6 +248,36 @@ run process(task, "extra context")   # mixed bare + quoted
 run process("${task}")              # equivalent to bare form
 ```
 
+### Nested Managed Calls in Arguments
+
+Call arguments can contain nested managed calls — but the `run` or `ensure` keyword must be explicit. This is a deliberate language rule: scripts and workflows execute only via `run`, and rules execute only via `ensure`, even when nested inside another call's arguments.
+
+**Valid — explicit nested calls:**
+
+```jaiph
+run mkdir_p_simple(run jaiph_tmp_dir())
+run do_work(ensure check_ok())
+run do_work(run `echo aaa`())
+```
+
+The nested call executes first and its result is passed as a single argument to the outer call.
+
+**Invalid — bare call-like forms:**
+
+```jaiph
+# run do_work(bar())          — E_VALIDATE: use "run bar()" or "ensure bar()"
+# run do_work(rule_bar())     — E_VALIDATE: use "ensure rule_bar()"
+# run do_work(`echo aaa`())   — E_VALIDATE: use "run `...`()"
+# const x = bar()             — E_PARSE: use "const x = run bar()"
+```
+
+The explicit capture-then-pass form is also valid:
+
+```jaiph
+const x = run bar()
+run foo(x)
+```
+
 ### Arity Checking
 
 When the callee declares named parameters, the compiler validates argument count:
diff --git a/src/format/emit.ts b/src/format/emit.ts
index bd7c7e08..20f02a35 100644
--- a/src/format/emit.ts
+++ b/src/format/emit.ts
@@ -371,6 +371,33 @@ function emitSteps(steps: WorkflowStepDef[], pad: string, currentIndent: string)
   return lines;
 }
 
+/** Try to parse `` `body`(args) `` from the start of a string. Returns consumed length or null. */
+function parseInlineScriptArg(s: string): { body: string; innerArgs: string; consumed: number } | null {
+  if (!s.startsWith("`")) return null;
+  const closeIdx = s.indexOf("`", 1);
+  if (closeIdx === -1) return null;
+  const body = s.slice(1, closeIdx);
+  const afterClose = s.slice(closeIdx + 1);
+  if (!afterClose.startsWith("(")) return null;
+  let depth = 1;
+  let j = 1;
+  let inQuote: string | null = null;
+  while (j < afterClose.length && depth > 0) {
+    const ch = afterClose[j];
+    if (inQuote) {
+      if (ch === inQuote && afterClose[j - 1] !== "\\") inQuote = null;
+    } else {
+      if (ch === '"' || ch === "'") inQuote = ch;
+      else if (ch === "(") depth++;
+      else if (ch === ")") depth--;
+    }
+    j++;
+  }
+  if (depth !== 0) return null;
+  const innerArgs = afterClose.slice(1, j - 1).trim();
+  return { body, innerArgs, consumed: closeIdx + 1 + j };
+}
+
 /** Convert space-separated args back to comma-separated format with bare identifiers. */
 function formatArgs(args: string, bareIdentifierArgs?: string[]): string {
   const bare = new Set(bareIdentifierArgs ?? []);
@@ -395,6 +422,16 @@ function formatArgs(args: string, bareIdentifierArgs?: string[]): string {
         i += keyword.length + skipped + consumed;
         continue;
       }
+      // Try inline script form: run `body`(args)
+      if (keyword === "run") {
+        const inlineResult = parseInlineScriptArg(afterKeyword);
+        if (inlineResult) {
+          const formattedInner = inlineResult.innerArgs ? formatArgs(inlineResult.innerArgs) : "";
+          tokens.push(`run \`${inlineResult.body}\`(${formattedInner})`);
+          i += keyword.length + skipped + inlineResult.consumed;
+          continue;
+        }
+      }
     }
     if (args[i] === '"') {
       let j = i + 1;
diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts
index 3382c1a3..400d6f4a 100644
--- a/src/runtime/docker.ts
+++ b/src/runtime/docker.ts
@@ -396,11 +396,24 @@ if [ "$overlay_ok" -ne 1 ]; then
       printf '\n' >&2
     fi
   else
-    printf 'jaiph docker: workspace overlay unavailable and rsync copy fallback is unavailable; /jaiph/workspace may be incomplete' >&2
-    if [ -n "$overlay_reason" ]; then
-      printf ' (%s)' "$overlay_reason" >&2
+    if cp -a "$LOWER"/. "$MERGED"/ 2>/tmp/jaiph-workspace-cp.err; then
+      printf 'jaiph docker: workspace overlay unavailable; using cp fallback at /jaiph/workspace' >&2
+      if [ -n "$overlay_reason" ]; then
+        printf ' (%s)' "$overlay_reason" >&2
+      fi
+      printf '\n' >&2
+      overlay_ok=1
+    else
+      cp_reason="$(tr '\n' ' ' </tmp/jaiph-workspace-cp.err | sed 's/[[:space:]]\+/ /g; s/^ //; s/ $//')"
+      printf 'jaiph docker: workspace overlay unavailable and copy fallbacks are unavailable; /jaiph/workspace may be incomplete' >&2
+      if [ -n "$overlay_reason" ]; then
+        printf ' (%s)' "$overlay_reason" >&2
+      fi
+      if [ -n "$cp_reason" ]; then
+        printf ' [cp fallback: %s]' "$cp_reason" >&2
+      fi
+      printf '\n' >&2
     fi
-    printf '\n' >&2
   fi
 fi
 exec "$@"
diff --git a/src/runtime/kernel/node-workflow-runtime.ts b/src/runtime/kernel/node-workflow-runtime.ts
index c6ee76ad..9678840c 100644
--- a/src/runtime/kernel/node-workflow-runtime.ts
+++ b/src/runtime/kernel/node-workflow-runtime.ts
@@ -162,7 +162,37 @@ function parseArgsRaw(raw: string, vars: Map<string, string>, env?: NodeJS.Proce
 
 type ParsedArgToken =
   | { kind: "literal"; value: string }
-  | { kind: "managed"; managedKind: "run" | "ensure"; ref: string; argsRaw: string };
+  | { kind: "managed"; managedKind: "run" | "ensure"; ref: string; argsRaw: string }
+  | { kind: "managed_inline_script"; body: string; lang?: string; argsRaw: string };
+
+/** Try to parse `\`body\`(args)` from a string at a given position. */
+function parseInlineScriptAt(s: string): { body: string; argsRaw: string; consumed: number } | null {
+  const t = s.trimStart();
+  const skippedWs = s.length - t.length;
+  if (!t.startsWith("`")) return null;
+  const closeIdx = t.indexOf("`", 1);
+  if (closeIdx === -1) return null;
+  const body = t.slice(1, closeIdx);
+  const afterClose = t.slice(closeIdx + 1);
+  if (!afterClose.startsWith("(")) return null;
+  let depth = 1;
+  let i = 1;
+  let inQuote: string | null = null;
+  while (i < afterClose.length && depth > 0) {
+    const ch = afterClose[i];
+    if (inQuote) {
+      if (ch === inQuote && afterClose[i - 1] !== "\\") inQuote = null;
+    } else {
+      if (ch === '"' || ch === "'") inQuote = ch;
+      else if (ch === "(") depth++;
+      else if (ch === ")") depth--;
+    }
+    i++;
+  }
+  if (depth !== 0) return null;
+  const argsContent = afterClose.slice(1, i - 1).trim();
+  return { body, argsRaw: argsContent, consumed: skippedWs + closeIdx + 1 + i };
+}
 
 function parseManagedArgAt(raw: string, start: number): { token: ParsedArgToken; next: number } | null {
   const tail = raw.slice(start);
@@ -175,18 +205,33 @@ function parseManagedArgAt(raw: string, start: number): { token: ParsedArgToken;
   const afterKeyword = raw.slice(start + keyword.length).trimStart();
   const skipped = raw.slice(start + keyword.length).length - afterKeyword.length;
   const call = parseCallRef(afterKeyword);
-  if (!call) return null;
-  if (call.rest.length > 0 && !/^\s/.test(call.rest)) return null;
-  const consumed = afterKeyword.length - call.rest.length;
-  return {
-    token: {
-      kind: "managed",
-      managedKind: keyword,
-      ref: call.ref,
-      argsRaw: call.args ?? "",
-    },
-    next: start + keyword.length + skipped + consumed,
-  };
+  if (call && (call.rest.length === 0 || /^\s/.test(call.rest))) {
+    const consumed = afterKeyword.length - call.rest.length;
+    return {
+      token: {
+        kind: "managed",
+        managedKind: keyword,
+        ref: call.ref,
+        argsRaw: call.args ?? "",
+      },
+      next: start + keyword.length + skipped + consumed,
+    };
+  }
+  // Try inline script form: run `body`(args)
+  if (keyword === "run") {
+    const inlineResult = parseInlineScriptAt(afterKeyword);
+    if (inlineResult) {
+      return {
+        token: {
+          kind: "managed_inline_script",
+          body: inlineResult.body,
+          argsRaw: inlineResult.argsRaw,
+        },
+        next: start + keyword.length + skipped + inlineResult.consumed,
+      };
+    }
+  }
+  return null;
 }
 
 function parseArgTokens(raw: string): ParsedArgToken[] {
@@ -1297,6 +1342,16 @@ export class NodeWorkflowRuntime {
     return `${filePath}::${name}`;
   }
 
+  /** Synchronous fast-path: resolve args when every token is a plain literal. */
+  private resolveArgsRawSync(scope: Scope, raw: string | string[]): string[] | null {
+    if (Array.isArray(raw)) return raw;
+    const tokens = parseArgTokens(raw);
+    for (const token of tokens) {
+      if (token.kind !== "literal") return null;
+    }
+    return tokens.map((t) => interpolate((t as { kind: "literal"; value: string }).value, scope.vars, scope.env));
+  }
+
   private async resolveArgsRaw(scope: Scope, raw: string | string[]): Promise<string[] | StepResult> {
     if (Array.isArray(raw)) {
       return raw;
@@ -1308,6 +1363,12 @@ export class NodeWorkflowRuntime {
         resolved.push(interpolate(token.value, scope.vars, scope.env));
         continue;
       }
+      if (token.kind === "managed_inline_script") {
+        const result = await this.executeInlineScript(scope, token.body, undefined, token.argsRaw);
+        if (result.status !== 0) return result;
+        resolved.push(result.returnValue ?? result.output.trim());
+        continue;
+      }
       const result = token.managedKind === "run"
         ? await this.executeRunRef(scope, token.ref, token.argsRaw)
         : await this.executeEnsureRef(scope, token.ref, token.argsRaw, undefined);
@@ -1320,7 +1381,7 @@ export class NodeWorkflowRuntime {
   }
 
   private async executeRunRef(scope: Scope, ref: string, argsRaw: string | string[]): Promise<StepResult> {
-    const resolvedArgs = await this.resolveArgsRaw(scope, argsRaw);
+    const resolvedArgs = this.resolveArgsRawSync(scope, argsRaw) ?? await this.resolveArgsRaw(scope, argsRaw);
     if (!Array.isArray(resolvedArgs)) return resolvedArgs;
     const args = resolvedArgs;
     const resolvedWorkflow = resolveWorkflowRef(this.graph, scope.filePath, { value: ref, loc: { line: 1, col: 1 } });
diff --git a/src/transpile/validate-managed-calls.test.ts b/src/transpile/validate-managed-calls.test.ts
index d5bb238b..baf91ec6 100644
--- a/src/transpile/validate-managed-calls.test.ts
+++ b/src/transpile/validate-managed-calls.test.ts
@@ -464,3 +464,160 @@ test("E_VALIDATE: ${arg1} in log is unknown identifier", () => {
     rmSync(root, { recursive: true, force: true });
   }
 });
+
+// --- Explicit nested managed call tests ---
+
+test("buildScripts accepts run foo(run bar()) — explicit nested managed call", () => {
+  const root = mkdtempSync(join(tmpdir(), "jaiph-val-nested-run-run-"));
+  const out = join(root, "out");
+  try {
+    writeFileSync(
+      join(root, "m.jh"),
+      [
+        'script mkdir_p_simple = `mkdir -p "$1"`',
+        'script jaiph_tmp_dir = `printf "%s\\n" "/tmp/jaiph"`',
+        "workflow default() {",
+        "  run mkdir_p_simple(run jaiph_tmp_dir())",
+        "}",
+        "",
+      ].join("\n"),
+    );
+    buildScripts(join(root, "m.jh"), out);
+  } finally {
+    rmSync(root, { recursive: true, force: true });
+  }
+});
+
+test("buildScripts accepts run foo(ensure rule_bar()) — explicit nested ensure", () => {
+  const root = mkdtempSync(join(tmpdir(), "jaiph-val-nested-run-ensure-"));
+  const out = join(root, "out");
+  try {
+    writeFileSync(
+      join(root, "m.jh"),
+      [
+        'script do_work = `echo "$1"`',
+        "rule check_ok() {",
+        '  run do_work("ok")',
+        "}",
+        "workflow default() {",
+        "  run do_work(ensure check_ok())",
+        "}",
+        "",
+      ].join("\n"),
+    );
+    buildScripts(join(root, "m.jh"), out);
+  } finally {
+    rmSync(root, { recursive: true, force: true });
+  }
+});
+
+test("buildScripts accepts run foo(run `echo aaa`()) — explicit nested inline script", () => {
+  const root = mkdtempSync(join(tmpdir(), "jaiph-val-nested-run-inline-"));
+  const out = join(root, "out");
+  try {
+    writeFileSync(
+      join(root, "m.jh"),
+      [
+        'script do_work = `echo "$1"`',
+        "workflow default() {",
+        "  run do_work(run `echo aaa`())",
+        "}",
+        "",
+      ].join("\n"),
+    );
+    buildScripts(join(root, "m.jh"), out);
+  } finally {
+    rmSync(root, { recursive: true, force: true });
+  }
+});
+
+test("buildScripts accepts const x = run bar() followed by run foo(x)", () => {
+  const root = mkdtempSync(join(tmpdir(), "jaiph-val-capture-then-pass-"));
+  const out = join(root, "out");
+  try {
+    writeFileSync(
+      join(root, "m.jh"),
+      [
+        'script bar = `echo "hello"`',
+        'script foo = `echo "$1"`',
+        "workflow default() {",
+        "  const x = run bar()",
+        "  run foo(x)",
+        "}",
+        "",
+      ].join("\n"),
+    );
+    buildScripts(join(root, "m.jh"), out);
+  } finally {
+    rmSync(root, { recursive: true, force: true });
+  }
+});
+
+test("E_VALIDATE: run foo(rule_bar()) — bare rule call in args is rejected", () => {
+  const root = mkdtempSync(join(tmpdir(), "jaiph-val-nested-bare-rule-"));
+  try {
+    writeFileSync(
+      join(root, "m.jh"),
+      [
+        'script do_work = `echo "$1"`',
+        "rule rule_bar() {",
+        '  run do_work("ok")',
+        "}",
+        "workflow default() {",
+        "  run do_work(rule_bar())",
+        "}",
+        "",
+      ].join("\n"),
+    );
+    assert.throws(
+      () => buildScripts(join(root, "m.jh"), join(root, "out")),
+      /nested managed calls in argument position must be explicit/,
+    );
+  } finally {
+    rmSync(root, { recursive: true, force: true });
+  }
+});
+
+test("E_VALIDATE: run foo(`echo aaa`()) — bare inline script call in args is rejected", () => {
+  const root = mkdtempSync(join(tmpdir(), "jaiph-val-nested-bare-inline-"));
+  try {
+    writeFileSync(
+      join(root, "m.jh"),
+      [
+        'script do_work = `echo "$1"`',
+        "workflow default() {",
+        "  run do_work(`echo aaa`())",
+        "}",
+        "",
+      ].join("\n"),
+    );
+    assert.throws(
+      () => buildScripts(join(root, "m.jh"), join(root, "out")),
+      /nested inline script calls in argument position must be explicit/,
+    );
+  } finally {
+    rmSync(root, { recursive: true, force: true });
+  }
+});
+
+test("E_VALIDATE: const x = bar() — bare call in const assignment is rejected", () => {
+  const root = mkdtempSync(join(tmpdir(), "jaiph-val-const-bare-call-"));
+  try {
+    writeFileSync(
+      join(root, "m.jh"),
+      [
+        'script bar = `echo "hello"`',
+        "workflow default() {",
+        "  const x = bar()",
+        "}",
+        "",
+      ].join("\n"),
+    );
+    assert.throws(
+      () => buildScripts(join(root, "m.jh"), join(root, "out")),
+      /Script calls in const assignments must use run/,
+    );
+  } finally {
+    rmSync(root, { recursive: true, force: true });
+  }
+});
diff --git a/src/transpile/validate.ts b/src/transpile/validate.ts
index 5b3d18ba..c197bccf 100644
--- a/src/transpile/validate.ts
+++ b/src/transpile/validate.ts
@@ -302,6 +302,23 @@ function validateNestedManagedCallArgs(
       `nested managed calls in argument position must be explicit; use "run ${match[1]}(...)" or "ensure ${match[1]}(...)" inside the argument list`,
     );
   }
+  // Detect bare inline script calls: `body`() without preceding run/ensure
+  const btRe = /`[^`]*`\s*\(/g;
+  let btMatch: RegExpExecArray | null;
+  while ((btMatch = btRe.exec(stripped)) !== null) {
+    const before = stripped.slice(0, btMatch.index).trimEnd();
+    const lastToken = before.length === 0 ? "" : before.slice(before.lastIndexOf(" ") + 1);
+    if (lastToken === "run" || lastToken === "ensure") {
+      continue;
+    }
+    throw jaiphError(
+      filePath,
+      loc.line,
+      loc.col,
+      "E_VALIDATE",
+      `nested inline script calls in argument position must be explicit; use "run \`...\`(...)" inside the argument list`,
+    );
+  }
 }
 
 /** Resolve a route target workflow ref to its declared parameter count. Returns undefined if unresolvable. */

From 955a67b8f3a4388197a5d44e1ded3f11c07e18e7 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Fri, 17 Apr 2026 21:32:52 +0200
Subject: [PATCH 04/38] Queue: Harden docker tasks, add
 version/name/description for jaiph config file

Signed-off-by: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
---
 QUEUE.md | 214 ++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 125 insertions(+), 89 deletions(-)

diff --git a/QUEUE.md b/QUEUE.md
index 993c8f50..0b34ce01 100644
--- a/QUEUE.md
+++ b/QUEUE.md
@@ -12,152 +12,149 @@ Process rules:
 
 ***
 
-## Docker — strict image contract + publish official `jaiph-runtime` images to GHCR
+## Docker — strict image contract + publish official `jaiph-runtime` images to GHCR #dev-ready
 
 **Goal**
-Remove all Docker runtime bootstrapping/fallback magic. In Docker mode, **every selected image must already contain a working `jaiph` CLI**. Jaiph must **not** build a thin derived image at runtime, must **not** mount host `dist/` into the container, and must **not** auto-install itself into arbitrary base images. The product contract becomes explicit: if Docker is on, the image is responsible for containing Jaiph.
+Remove all Docker runtime bootstrapping/fallback magic. In Docker mode, **every selected image must already contain a working `jaiph` CLI**. Jaiph must **not** build a thin derived image at runtime and must **not** auto-install itself into arbitrary base images. (Today the host uses `npm pack` + `docker build` to install the local package into a derived image; there is no bind-mount of host `dist/`, but that derived-image install path is equally forbidden.) The product contract becomes explicit: if Docker is on, the image is responsible for containing Jaiph.
 
-At the same time, publish an official Jaiph runtime image to **GHCR** and make it the default Docker image:
+Publish an official Jaiph runtime image to **GHCR** and make it the default Docker image:
 
 * tagged releases → `ghcr.io/jaiphlang/jaiph-runtime:<version>`
 * nightly builds → `ghcr.io/jaiphlang/jaiph-runtime:nightly`
-* default runtime image in Jaiph config/runtime should point at that official image
+* default `runtime.docker_image` / env default should point at that official image
 
-This is a deliberate contract change. Convenience fallback to `node:20-bookworm` + runtime bootstrap is **not** desired.
+Convenience fallback to `node:20-bookworm` + runtime bootstrap is **not** desired.
 
 **Required product decision**
 
 1. **Strict requirement** — all Docker images used by Jaiph must already have `jaiph`.
 2. **Official default image** — Jaiph publishes and uses `ghcr.io/jaiphlang/jaiph-runtime`.
-3. **No hidden runtime mutation** — no auto-derived image build, no host `dist/` mount hack, no `npm install -g` during Docker run startup.
+3. **No hidden runtime mutation** — no auto-derived image build, no `npm install -g` of Jaiph during Docker run startup.
 4. **Fast fail** — if the chosen image lacks `jaiph`, Jaiph must fail clearly with an explicit Docker/runtime error.
 
 **Why this task exists**
 
-The current codebase has tension between two incompatible models:
+The codebase currently mixes a generic contract (`jaiph run --raw` inside the container) with a convenience path (stock images without `jaiph`). Both cannot be true without bootstrapping. This task chooses the strict model and removes the second.
 
-* generic Docker contract: run `jaiph run --raw` inside the container
-* convenience contract: allow stock images that do not contain `jaiph`
+**Critical implementation detail (from current `src/runtime/docker.ts`)**
 
-Both cannot be true without runtime bootstrapping. This task intentionally chooses the first model and removes the second.
+When `imageExplicit === false`, `resolveImage` currently ends in `ensureLocalRuntimeImage`, which **always** targets a derived `jaiph-runtime-auto:*` tag built via `npm pack`, even if the base image already contains `jaiph`. After switching the default to the official GHCR image (or any image that already has `jaiph`), the runtime must **use that image as-is** when `command -v jaiph` succeeds — no auto-derivation. If `jaiph` is missing, fail fast (no fallback build).
+
+**Resolved defaults (no longer open)**
+
+* **Default tag rule**: Release npm builds embed `ghcr.io/jaiphlang/jaiph-runtime:<semver>` matching the package/`jaiph` version. Main/nightly CI artifacts and docs for contributors use the `:nightly` tag; state the rule explicitly in docs.
+* **Cursor / Claude CLIs in the official image**: **Exclude by default** from the minimal `jaiph-runtime` image to keep size and supply chain small; document how to extend a custom image (the managed `.jaiph/Dockerfile` template may remain a fuller example).
+
+**Queue coordination**
+
+Ship published GHCR images before or together with the later queued task “Runtime — default Docker when not CI or unsafe”, which will expect a pullable default image for local users.
 
 **Context**
 
-* Docker runtime implementation: `src/runtime/docker.ts`
-* Docker run path / spawn site: `src/cli/commands/run.ts`
-* Docker docs: `docs/sandboxing.md`, `docs/configuration.md`, `docs/cli.md`
-* Current Docker E2E coverage: `e2e/tests/72_docker_run_artifacts.sh`, `e2e/tests/73_docker_dockerfile_detection.sh`, `e2e/tests/74_docker_lifecycle.sh`
-* Managed project Dockerfile template: `.jaiph/Dockerfile`, plus `jaiph init` scaffolding in `src/cli/commands/init.ts`
-* CI/release workflows: `.github/workflows/ci.yml`, `.github/workflows/release.yml`, `.github/workflows/nightly-engineer.yml`
+* Docker runtime: `src/runtime/docker.ts`
+* Docker run path: `src/cli/commands/run.ts`
+* Docs: `docs/sandboxing.md`, `docs/configuration.md`, `docs/cli.md`
+* E2E: `e2e/tests/72_docker_run_artifacts.sh`, `e2e/tests/73_docker_dockerfile_detection.sh`, `e2e/tests/74_docker_lifecycle.sh`
+* Managed Dockerfile: `.jaiph/Dockerfile`, `src/cli/commands/init.ts`
+* CI: `.github/workflows/ci.yml`, `.github/workflows/release.yml`, `.github/workflows/nightly-engineer.yml`
 
 **Implementation requirements**
 
-1. **Runtime**
-   * Remove Docker fallback logic that auto-builds a derived image or auto-installs Jaiph into arbitrary base images.
-   * Keep the container entry generic: `jaiph run --raw ...`
-   * Add an explicit preflight/validation step for Docker images:
-     * either the selected image is the official `ghcr.io/jaiphlang/jaiph-runtime:*`,
-     * or a custom image that already contains `jaiph`.
-   * If `jaiph` is missing in the chosen image, fail with a clear error message that tells the user to:
-     * use the official GHCR image, or
-     * install Jaiph in their custom image.
-
-2. **Default image**
-   * Change the default Docker image away from `node:20-bookworm`.
-   * Default must become the official GHCR runtime image.
-   * Decide whether the default tag should be version-pinned at release time and `nightly` on main/nightly builds; document the exact rule.
-
-3. **Publishing**
-   * Add CI/release automation to build and publish `ghcr.io/jaiphlang/jaiph-runtime`.
-   * Publish at least:
-     * per-tag release images
-     * `nightly`
-   * Ensure the published image contains:
-     * `jaiph`
-     * Node.js
-     * `fuse-overlayfs` / Docker runtime prerequisites
-     * non-root runtime user if that remains part of the sandbox contract
-   * Decide whether Cursor / Claude CLIs belong in the official runtime image by default; document the decision explicitly.
-
-4. **Docs**
-   * Rewrite Docker docs to state the strict image contract clearly.
-   * Document the official GHCR image as the default and recommended path.
-   * Document how custom images must install `jaiph`.
-   * Remove any wording that implies Jaiph will make arbitrary base images work automatically.
-
-5. **Tests**
-   * Update E2E/tests so they assert the strict contract, not the bootstrap fallback.
-   * In particular, tests that currently expect `node:20-bookworm` to work without Jaiph must be rewritten.
-   * Add/keep a regression test that proves Docker fails clearly when the selected image lacks `jaiph`.
+1. **Runtime** — Remove `ensureLocalRuntimeImage` / `buildRuntimeImageFromLocalPackage` / auto-derivation paths. Keep container entry `jaiph run --raw ...`. Preflight: after pull, verify `jaiph` exists in the selected image; if not, error with guidance to use `ghcr.io/jaiphlang/jaiph-runtime` or install Jaiph in a custom image. Preflight is by capability check, not by image name whitelist.
+2. **Default image** — Default becomes the official GHCR runtime image (not `node:20-bookworm`).
+3. **Publishing** — CI/release builds and pushes `ghcr.io/jaiphlang/jaiph-runtime` for release tags and `nightly`. Image includes Node.js, `jaiph`, `fuse-overlayfs` (and other sandbox prereqs per `.jaiph/Dockerfile`), and non-root user if that remains the contract.
+4. **Docs** — Rewrite Docker sections for the strict contract; remove language about auto-derived images and stock bases “just working.”
+5. **Tests** — Update E2E for strict contract; add/keep regression that an image without `jaiph` fails with a clear error.
+
+**Scope note**
+
+Expect changes across more than three files (runtime, CI workflows, init scaffolding, docs, E2E, unit tests). Prefer plain functions and small helpers; `docker.ts` is already large—avoid speculative abstractions.
 
 **Acceptance criteria**
 
 * Default Docker image is `ghcr.io/jaiphlang/jaiph-runtime:*`, not `node:20-bookworm`.
 * Jaiph never auto-builds a derived runtime image at Docker run time.
-* Jaiph never mounts host build output into the container to provide `jaiph`.
+* Jaiph never injects Jaiph into the container except by using an image that already contains it (no `npm pack` bootstrap).
 * A custom image without `jaiph` fails fast with a clear actionable error.
 * Official GHCR runtime images are published for release tags and `nightly`.
 * Docs describe the strict contract and official image flow without ambiguity.
-* Unit + E2E coverage prevents regression back to runtime bootstrap behavior.
+* Unit + E2E coverage prevents regression to bootstrap behavior.
 
-***
+## Support optional config properties in Jaiph DSL: version, name, description. #dev-ready
 
-## Support optional config properties in Jaiph DSL: version, name, description.
+**Goal**
 
-## Runtime — credential proxy for Docker mode
+Add optional module-scoped manifest fields in the module-level `config { }` block so a `.jh` file can declare human-readable **name**, **version**, and **description** without changing agent/run/runtime execution.
 
-**Goal**
-Containers should never hold real API keys. Implement a host-side HTTP proxy (the "Phantom Token" pattern) that intercepts outbound API requests from containers, strips a placeholder credential, and injects the real key before forwarding upstream. The agent inside the container literally cannot leak the real key — it never has it.
+**Keys (dot-separated, string values)**
 
-**Design**
+- `module.name`
+- `module.version`
+- `module.description`
 
-1. **Host-side proxy** — a lightweight `http.createServer` bound to `127.0.0.1:<port>` (macOS/WSL2) or the `docker0` bridge IP (Linux). Receives requests from the container, swaps `x-api-key: placeholder` with the real key from host env, forwards to the upstream API, pipes the response back (including streaming SSE).
-2. **Container env injection** — instead of passing `ANTHROPIC_API_KEY=$real_key` into `docker run`, pass `ANTHROPIC_API_KEY=placeholder` + `ANTHROPIC_BASE_URL=http://host.docker.internal:<port>`.
-3. **Multi-backend routing** — Jaiph supports Claude and Cursor backends. Each backend's CLI must respect a base URL override env var. `claude` CLI supports `ANTHROPIC_BASE_URL`; `cursor-agent` may not — needs investigation.
-4. **Lifecycle** — proxy starts before the first Docker container launch, shuts down after the last container exits or on Jaiph process exit.
+All optional; omitted keys leave the corresponding field unset.
 
-**Context**
+**Semantics**
+
+- Values use the same double-quoted string rules as other config strings (existing escapes). No semver validation in v1 unless a later task adds it.
+- **Module-level only:** `module.*` keys must not appear in workflow-level `config { }` blocks. After parsing, reject workflow-level config that sets any `module.*` key, using the same pattern as the existing `runtime.*` workflow guard in `src/parse/workflows.ts`.
+- Stored on `WorkflowMetadata` as descriptive metadata only. They do **not** map into `JaiphConfig`, environment resolution, or the Node workflow runtime unless a future task wires them (e.g. MCP tool metadata).
+
+**Implementation touchpoints**
 
-* Pattern reference: [NanoClaw's credential proxy](https://jonno.nz/posts/nanoclaw-architecture-masterclass-in-doing-less/) — same approach, independently arrived at.
-* Current Docker execution path: `src/runtime/kernel/` — Docker run/exec logic, env var forwarding.
-* Dockerfile: `.jaiph/Dockerfile` — container image setup.
-* Backend CLI invocation: `src/runtime/kernel/node-workflow-runtime.ts` — where `claude` / `cursor-agent` commands are constructed with env vars.
+- `src/parse/metadata.ts` — `ALLOWED_KEYS`, `KEY_TYPES`, `assignConfigKey`.
+- `src/types.ts` — optional `module?: { name?: string; version?: string; description?: string }` on `WorkflowMetadata`.
+- `src/format/emit.ts` — formatter round-trip for the new keys.
+- `src/parse/workflows.ts` — workflow-level rejection for `module.*` (mirror `metadata.runtime`).
+- Tests: `src/parse/parse-metadata.test.ts`; update parse-error golden/txtar cases if the unknown-key allowed-list appears in expectations.
+- Docs: `docs/configuration.md`, `docs/grammar.md` (`config_key`).
 
-**Open questions**
+**Non-goals**
 
-* Does `cursor-agent` support a base URL override? If not, the proxy pattern may require a wrapper script or LD\_PRELOAD-based interception inside the container.
-* Single port with path-based routing vs one port per backend?
-* Should the proxy also enforce rate limits or audit-log API calls?
+- Environment variables, CLI output, or runtime behavior changes beyond parsing/formatting/validation.
+
+**Queue coordination**
+
+- No conflict with the queued `jaiph serve` MCP task; future work may read `module.description` for tool listings.
 
 **Acceptance criteria**
 
-* Host-side proxy starts automatically when Docker mode is active.
-* Containers receive only placeholder credentials — no real API keys in container env.
-* `claude` CLI calls from inside Docker succeed via the proxy.
-* Proxy handles streaming responses (SSE) correctly.
-* Real keys never appear in container logs, env dumps, or process listings.
-* Platform-specific host address resolution works (macOS, Linux).
+- Module-level `config` accepts `module.name`, `module.version`, and `module.description`; values round-trip through `jaiph format`.
+- Workflow-level `config` containing any `module.*` assignment fails with an explicit error (consistent with `runtime.*` workflow rules).
+- Unit tests cover happy path and workflow rejection; docs and grammar list the keys.
 
-***
+**Scope note**
+
+- Expect more than three files (parser, types, formatter, workflows guard, tests, docs); keep the existing plain `assignConfigKey` style — no new abstraction layers.
 
-## Runtime — harden Docker execution environment
+## Runtime — harden Docker execution environment #dev-ready
 
 **Goal**
-Docker mode is the isolation boundary for workflow runs. Harden it: least-privilege mounts, explicit and documented env forwarding (what crosses the container boundary), network defaults, image supply chain, and failure modes when Docker is misconfigured or unavailable — so "Docker on" is a deliberate security posture, not accidental leakage.
+Docker mode is the isolation boundary for workflow runs. Harden it: least-privilege mounts, explicit and documented env forwarding (what crosses the container boundary), network defaults, and failure modes when Docker is misconfigured or unavailable — so "Docker on" is a deliberate security posture, not accidental leakage. (Image provenance and the official default image belong to the queued **Docker — strict image contract + GHCR** task; this task only documents or tightens runtime-visible pull/verify behavior as needed, without redefining publishing or the default image.)
 
 **Context**
 
-* Docker runtime: `src/runtime/kernel/` — look for `docker.ts` or Docker-related logic in the run path.
-* E2E Docker tests: `e2e/tests/72_docker_run_artifacts.sh`, `e2e/tests/73_docker_dockerfile_detection.sh`.
-* Config: `runtime.docker_enabled`, `runtime.docker_timeout`, `runtime.workspace` keys in `src/config.ts` and metadata parsing.
+* Docker runtime: `src/runtime/docker.ts` (`parseMounts` / `validateMounts`, `resolveDockerConfig`, `buildDockerArgs`, `checkDockerAvailable`, `spawnDockerProcess`); CLI integration: `src/cli/commands/run.ts`.
+* Current forwarding: `buildDockerArgs` remaps `JAIPH_WORKSPACE` and `JAIPH_RUNS_DIR`, passes through `JAIPH_*` except `JAIPH_DOCKER_*`, and passes keys prefixed `CURSOR_`, `ANTHROPIC_`, or `CLAUDE_` (see `AGENT_ENV_PREFIXES` in `docker.ts`). Mounts come from resolved `runtime.workspace` plus fixed rw run-dir, ro overlay script, and `--device /dev/fuse`.
+* E2E: `e2e/tests/72_docker_run_artifacts.sh`, `e2e/tests/73_docker_dockerfile_detection.sh`.
+* Config: `runtime.docker_enabled`, `runtime.docker_image`, `runtime.docker_network`, `runtime.docker_timeout`, `runtime.workspace` via `src/config.ts` and metadata parsing.
+
+**Queue coordination**
+
+* Land after or together with **Docker — strict image contract + publish official `jaiph-runtime` images to GHCR** so bootstrap removal and default image changes are settled before deep hardening refactors the same code paths.
+* Land after or together with **Runtime — credential proxy for Docker mode** so any env allowlist/denylist and `docs/sandboxing.md` text stay consistent with placeholder `ANTHROPIC_*` and host-reachable API base URLs (no real secrets in `-e`).
+* The later task **Runtime — default Docker when not CI or unsafe** changes `runtime.docker_enabled` defaults; avoid conflicting precedence — document how hardened Docker behavior interacts with that default once both exist.
 
 **Acceptance criteria**
 
-* Threat-model notes (short section in `docs/sandboxing.md` or equivalent): what Docker is / isn't protecting against.
-* Concrete hardening changes in `docker.ts` / run path (e.g. mount validation, env allowlist or documented denylist, safer defaults) with unit tests.
+* Threat-model notes (short section in `docs/sandboxing.md` or equivalent): what Docker is / is not protecting against (including that hooks run on the host).
+* Concrete hardening changes in `docker.ts` / run path (e.g. mount validation, env allowlist or documented denylist aligned with the credential-proxy contract, safer defaults) with unit tests.
 * No silent widen of host access without opt-in.
+* Document network mode behavior (`runtime.docker_network` / `--network`) and failure modes for missing Docker or failed pulls (`E_DOCKER_*`), extending existing patterns where appropriate.
 
-***
+**Scope note**
+
+* `docker.ts` is already large (~650+ lines); prefer small helpers or one focused sibling module over speculative abstractions. Expect at least `docker.ts`, `docker.test.ts`, and `docs/sandboxing.md`; split follow-ups if the change set outgrows one cycle.
 
 ## Runtime — default Docker when not CI or unsafe #dev-ready
 
@@ -180,6 +177,45 @@ Introduce **`JAIPH_UNSAFE=true`** as the explicit "run on host / skip Docker def
 
 ***
 
+## Runtime — credential proxy for Docker mode
+
+**Goal**
+Containers should never hold real API keys. Implement a host-side HTTP proxy (the Phantom Token pattern) that intercepts outbound API requests from containers, strips a placeholder credential, and injects the real key from the host process environment before forwarding upstream. The workload in the container never receives the real secret.
+
+**Design**
+
+1. **Host-side proxy** — A lightweight Node HTTP server bound to an address **reachable from the container network** (typically **`0.0.0.0:<ephemeral-port>`** on the host; binding only `127.0.0.1` is often wrong for container-to-host access). For each request: replace placeholder auth with the real `ANTHROPIC_API_KEY` from the host, forward to the real Anthropic API base URL from host configuration, stream the response back (including SSE).
+2. **Container env injection** — In `src/runtime/docker.ts` (`buildDockerArgs` / env passed into `-e`): pass `ANTHROPIC_API_KEY=<placeholder>` and `ANTHROPIC_BASE_URL=http://host.docker.internal:<port>` (or `http://<host-gateway>:<port>`). Never pass the real key in `-e`.
+3. **Linux networking** — When using the hostname `host.docker.internal`, add **`--add-host=host.docker.internal:host-gateway`** to the `docker run` argument list where supported so the name resolves inside the container.
+4. **Backends (v1 scope)** — **Claude / Anthropic only.** The Anthropic SDK and `claude` CLI honor `ANTHROPIC_BASE_URL`. **Cursor (`cursor-agent`)** does not have a documented equivalent to `ANTHROPIC_BASE_URL` in public Cursor CLI docs; **leave Cursor and codex (`OPENAI_*`) out of this task** and open a follow-up if the product needs the same guarantee there.
+5. **Routing** — **Single listen port** and a single Anthropic-compatible upstream in v1. Multi-upstream path routing is deferred.
+6. **Non-goals (v1)** — Rate limits and audit logging.
+7. **Lifecycle** — Start the proxy before the first `spawnDockerProcess` for that Jaiph process; stop it when tearing down the Docker run (and on Jaiph exit), with reference counting if multiple Docker runs can occur in one process.
+
+**Context**
+
+* Pattern reference: [NanoClaw credential proxy](https://jonno.nz/posts/nanoclaw-architecture-masterclass-in-doing-less/).
+* **Implementation touchpoints** — `src/runtime/docker.ts` (primary: `-e` forwarding, optional extra Docker flags), `src/cli/commands/run.ts` (spawn/cleanup lifecycle). Agent CLI args/env preparation: `src/runtime/kernel/prompt.ts` (likely unchanged).
+* Image template: `.jaiph/Dockerfile`.
+
+**Queue coordination**
+
+* This edits the same `docker.ts` / Docker spawn path as the queued **Docker — strict image contract + GHCR** task—land together or immediately after to reduce merge churn.
+* Later **Runtime — harden Docker execution environment** may tighten env policy; document proxy-related variables when that work lands.
+
+**Acceptance criteria**
+
+* Host-side proxy starts automatically when Docker mode is active (Anthropic/Claude path).
+* Containers receive only a placeholder `ANTHROPIC_API_KEY` — no real Anthropic API key in container environment.
+* `claude` CLI calls from inside Docker succeed via the proxy.
+* Proxy handles streaming responses (SSE) correctly.
+* Real keys do not appear in Jaiph-supplied container `-e` values (so they do not appear in `docker inspect` for those vars or in container `printenv` for them as anything but the placeholder).
+* macOS and Linux: documented/working host reachability (`host.docker.internal` + `host-gateway` on Linux as needed, or an equivalent bridge address).
+
+**Scope note**
+
+* Target **~3 files**: one small new module for the proxy plus focused edits in `docker.ts` and `run.ts`. Plain functions, no new abstraction layers.
+
 ## `jaiph serve` — expose workflows as an MCP server #dev-ready
 
 **Goal**

From d398b7a51f6dbb14ad86f1f82203135d995b2828 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Fri, 17 Apr 2026 21:51:57 +0200
Subject: [PATCH 05/38] Feat: Enforce strict Docker image contract and publish
 official GHCR runtime images

Remove all auto-derivation and runtime bootstrap paths from Docker mode.
The runtime no longer builds derived images via npm pack or installs
jaiph into arbitrary base images at run time. Every Docker image must
already contain a working jaiph CLI; missing jaiph now fails fast with
an actionable error.

Default docker_image switches from node:20-bookworm to the official
ghcr.io/jaiphlang/jaiph-runtime image. A new CI workflow publishes
that image for release tags and nightly builds. Docs, init scaffolding,
and E2E tests are updated to reflect the strict contract.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/docker-publish.yml        |  72 +++++++++
 CHANGELOG.md                                |   1 +
 QUEUE.md                                    | 110 +++----------
 docker/Dockerfile.runtime                   |  42 +++++
 docs/architecture.md                        |   2 +-
 docs/cli.md                                 |   4 +-
 docs/configuration.md                       |   2 +-
 docs/sandboxing.md                          |  40 ++++-
 e2e/lib/common.sh                           |  53 ++++++
 e2e/tests/00_install_and_init.sh            |  35 +---
 e2e/tests/72_docker_run_artifacts.sh        |  39 ++++-
 e2e/tests/73_docker_dockerfile_detection.sh |  43 +++--
 e2e/tests/74_docker_lifecycle.sh            |  11 +-
 src/cli/commands/init.test.ts               |   9 +-
 src/cli/commands/init.ts                    |  33 +---
 src/runtime/docker.test.ts                  |  34 +++-
 src/runtime/docker.ts                       | 168 +++++---------------
 17 files changed, 392 insertions(+), 306 deletions(-)
 create mode 100644 .github/workflows/docker-publish.yml
 create mode 100644 docker/Dockerfile.runtime

diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
new file mode 100644
index 00000000..29b1c82f
--- /dev/null
+++ b/.github/workflows/docker-publish.yml
@@ -0,0 +1,72 @@
+name: Publish Docker runtime image
+
+on:
+  push:
+    branches: [nightly]
+    tags: ["v*"]
+
+permissions:
+  contents: read
+  packages: write
+
+env:
+  REGISTRY: ghcr.io
+  IMAGE_NAME: jaiphlang/jaiph-runtime
+
+jobs:
+  publish:
+    name: Build and push jaiph-runtime
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Node
+        uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+          cache: npm
+
+      - name: Install dependencies and build
+        run: |
+          npm ci
+          npm run build
+
+      - name: Create npm tarball
+        run: npm pack --pack-destination docker/
+
+      - name: Rename tarball for Dockerfile
+        run: |
+          cd docker
+          mv jaiph-*.tgz jaiph.tgz
+
+      - name: Log in to GHCR
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Determine image tags
+        id: tags
+        run: |
+          if [[ "${GITHUB_REF}" == refs/tags/v* ]]; then
+            VERSION="${GITHUB_REF_NAME#v}"
+            echo "tags=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${VERSION},${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest" >> "$GITHUB_OUTPUT"
+          else
+            echo "tags=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:nightly" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Build and push
+        uses: docker/build-push-action@v6
+        with:
+          context: docker
+          file: docker/Dockerfile.runtime
+          push: true
+          tags: ${{ steps.tags.outputs.tags }}
+          build-args: JAIPH_TARBALL=jaiph.tgz
+
+      - name: Verify pushed image contains jaiph
+        run: |
+          TAG="$(echo '${{ steps.tags.outputs.tags }}' | cut -d',' -f1)"
+          docker run --rm --entrypoint sh "${TAG}" -lc "command -v jaiph && jaiph --version"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index cd5409d9..dd64875c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ## All changes
 
+- **Breaking — Docker:** Strict image contract and official GHCR runtime images — Docker mode now enforces a strict contract: every Docker image used by Jaiph must already contain a working `jaiph` CLI. Jaiph no longer auto-builds derived images or bootstraps itself into containers at runtime (no `npm pack`, no `npm install -g` into arbitrary base images). If the selected image lacks `jaiph`, the run fails immediately with `E_DOCKER_NO_JAIPH` and actionable guidance. The default `runtime.docker_image` is now `ghcr.io/jaiphlang/jaiph-runtime:<version>` (matching the installed jaiph version), replacing the previous `node:20-bookworm` default. Official runtime images are published to GHCR: `ghcr.io/jaiphlang/jaiph-runtime:<semver>` for release tags, `:nightly` for the nightly branch, and `:latest` as a convenience alias. The official image includes Node.js, jaiph, `fuse-overlayfs`, and a non-root `jaiph` user (UID 10001); it does not include agent CLIs to keep the image minimal. The `jaiph init` Dockerfile template now extends the official image (`FROM ghcr.io/jaiphlang/jaiph-runtime:nightly`) and only adds agent CLIs (Claude Code, cursor-agent), instead of building from `ubuntu:latest` with a full install chain. Removed functions: `ensureLocalRuntimeImage`, `buildRuntimeImageFromLocalPackage`, `autoRuntimeImageTag`, `imageConfiguredUser`, `imageHomeDir`. Added: `verifyImageHasJaiph`, `GHCR_IMAGE_REPO`, `resolveDefaultImageTag`. CI: new `.github/workflows/docker-publish.yml` publishes the runtime image on release tags and nightly pushes. Implementation: `src/runtime/docker.ts`, `src/cli/commands/init.ts`, `docker/Dockerfile.runtime`. Unit and E2E tests updated for the strict contract — regression test confirms images without jaiph fail with `E_DOCKER_NO_JAIPH`. Docs updated (`docs/sandboxing.md`, `docs/configuration.md`, `docs/cli.md`, `docs/architecture.md`).
 - **Feature — Language/Runtime:** Explicit nested managed calls in argument position — Call arguments can now contain nested managed calls using `run` or `ensure` keywords explicitly: `run foo(run bar())`, `run foo(ensure rule_bar())`, and `run foo(run \`echo "aaa"\`())`. The nested call executes first and its result is passed as a single argument to the outer call. Bare call-like forms in argument position are rejected at compile time: `run foo(bar())` → `E_VALIDATE` with an actionable message telling the user to add `run` or `ensure`. Bare inline script calls in argument position (`run foo(\`echo aaa\`())`) are also rejected with guidance. The explicit capture-then-pass form (`const x = run bar()` followed by `run foo(x)`) remains valid. Bare call-like forms in `const` assignments (`const x = bar()`) are also rejected — use `const x = run bar()`. The formatter round-trips explicit nested forms correctly, including the inline script variant. The runtime evaluates nested managed argument tokens (workflows, scripts, rules, and inline scripts) before passing the result to the outer call. Implementation: validator (`src/transpile/validate.ts` — `validateNestedManagedCallArgs` extended for inline script detection), runtime (`src/runtime/kernel/node-workflow-runtime.ts` — `managed_inline_script` token kind, `parseInlineScriptAt`, `resolveArgsRawSync` fast path), formatter (`src/format/emit.ts` — `parseInlineScriptArg`, inline script formatting in `formatArgs`). Regression tests added for all valid and invalid forms. Docs updated (`docs/language.md`, `docs/grammar.md`, `docs/jaiph-skill.md`).
 
 # 0.9.2
diff --git a/QUEUE.md b/QUEUE.md
index 0b34ce01..e6ffada4 100644
--- a/QUEUE.md
+++ b/QUEUE.md
@@ -12,74 +12,6 @@ Process rules:
 
 ***
 
-## Docker — strict image contract + publish official `jaiph-runtime` images to GHCR #dev-ready
-
-**Goal**
-Remove all Docker runtime bootstrapping/fallback magic. In Docker mode, **every selected image must already contain a working `jaiph` CLI**. Jaiph must **not** build a thin derived image at runtime and must **not** auto-install itself into arbitrary base images. (Today the host uses `npm pack` + `docker build` to install the local package into a derived image; there is no bind-mount of host `dist/`, but that derived-image install path is equally forbidden.) The product contract becomes explicit: if Docker is on, the image is responsible for containing Jaiph.
-
-Publish an official Jaiph runtime image to **GHCR** and make it the default Docker image:
-
-* tagged releases → `ghcr.io/jaiphlang/jaiph-runtime:<version>`
-* nightly builds → `ghcr.io/jaiphlang/jaiph-runtime:nightly`
-* default `runtime.docker_image` / env default should point at that official image
-
-Convenience fallback to `node:20-bookworm` + runtime bootstrap is **not** desired.
-
-**Required product decision**
-
-1. **Strict requirement** — all Docker images used by Jaiph must already have `jaiph`.
-2. **Official default image** — Jaiph publishes and uses `ghcr.io/jaiphlang/jaiph-runtime`.
-3. **No hidden runtime mutation** — no auto-derived image build, no `npm install -g` of Jaiph during Docker run startup.
-4. **Fast fail** — if the chosen image lacks `jaiph`, Jaiph must fail clearly with an explicit Docker/runtime error.
-
-**Why this task exists**
-
-The codebase currently mixes a generic contract (`jaiph run --raw` inside the container) with a convenience path (stock images without `jaiph`). Both cannot be true without bootstrapping. This task chooses the strict model and removes the second.
-
-**Critical implementation detail (from current `src/runtime/docker.ts`)**
-
-When `imageExplicit === false`, `resolveImage` currently ends in `ensureLocalRuntimeImage`, which **always** targets a derived `jaiph-runtime-auto:*` tag built via `npm pack`, even if the base image already contains `jaiph`. After switching the default to the official GHCR image (or any image that already has `jaiph`), the runtime must **use that image as-is** when `command -v jaiph` succeeds — no auto-derivation. If `jaiph` is missing, fail fast (no fallback build).
-
-**Resolved defaults (no longer open)**
-
-* **Default tag rule**: Release npm builds embed `ghcr.io/jaiphlang/jaiph-runtime:<semver>` matching the package/`jaiph` version. Main/nightly CI artifacts and docs for contributors use the `:nightly` tag; state the rule explicitly in docs.
-* **Cursor / Claude CLIs in the official image**: **Exclude by default** from the minimal `jaiph-runtime` image to keep size and supply chain small; document how to extend a custom image (the managed `.jaiph/Dockerfile` template may remain a fuller example).
-
-**Queue coordination**
-
-Ship published GHCR images before or together with the later queued task “Runtime — default Docker when not CI or unsafe”, which will expect a pullable default image for local users.
-
-**Context**
-
-* Docker runtime: `src/runtime/docker.ts`
-* Docker run path: `src/cli/commands/run.ts`
-* Docs: `docs/sandboxing.md`, `docs/configuration.md`, `docs/cli.md`
-* E2E: `e2e/tests/72_docker_run_artifacts.sh`, `e2e/tests/73_docker_dockerfile_detection.sh`, `e2e/tests/74_docker_lifecycle.sh`
-* Managed Dockerfile: `.jaiph/Dockerfile`, `src/cli/commands/init.ts`
-* CI: `.github/workflows/ci.yml`, `.github/workflows/release.yml`, `.github/workflows/nightly-engineer.yml`
-
-**Implementation requirements**
-
-1. **Runtime** — Remove `ensureLocalRuntimeImage` / `buildRuntimeImageFromLocalPackage` / auto-derivation paths. Keep container entry `jaiph run --raw ...`. Preflight: after pull, verify `jaiph` exists in the selected image; if not, error with guidance to use `ghcr.io/jaiphlang/jaiph-runtime` or install Jaiph in a custom image. Preflight is by capability check, not by image name whitelist.
-2. **Default image** — Default becomes the official GHCR runtime image (not `node:20-bookworm`).
-3. **Publishing** — CI/release builds and pushes `ghcr.io/jaiphlang/jaiph-runtime` for release tags and `nightly`. Image includes Node.js, `jaiph`, `fuse-overlayfs` (and other sandbox prereqs per `.jaiph/Dockerfile`), and non-root user if that remains the contract.
-4. **Docs** — Rewrite Docker sections for the strict contract; remove language about auto-derived images and stock bases “just working.”
-5. **Tests** — Update E2E for strict contract; add/keep regression that an image without `jaiph` fails with a clear error.
-
-**Scope note**
-
-Expect changes across more than three files (runtime, CI workflows, init scaffolding, docs, E2E, unit tests). Prefer plain functions and small helpers; `docker.ts` is already large—avoid speculative abstractions.
-
-**Acceptance criteria**
-
-* Default Docker image is `ghcr.io/jaiphlang/jaiph-runtime:*`, not `node:20-bookworm`.
-* Jaiph never auto-builds a derived runtime image at Docker run time.
-* Jaiph never injects Jaiph into the container except by using an image that already contains it (no `npm pack` bootstrap).
-* A custom image without `jaiph` fails fast with a clear actionable error.
-* Official GHCR runtime images are published for release tags and `nightly`.
-* Docs describe the strict contract and official image flow without ambiguity.
-* Unit + E2E coverage prevents regression to bootstrap behavior.
-
 ## Support optional config properties in Jaiph DSL: version, name, description. #dev-ready
 
 **Goal**
@@ -88,44 +20,44 @@ Add optional module-scoped manifest fields in the module-level `config { }` bloc
 
 **Keys (dot-separated, string values)**
 
-- `module.name`
-- `module.version`
-- `module.description`
+* `module.name`
+* `module.version`
+* `module.description`
 
 All optional; omitted keys leave the corresponding field unset.
 
 **Semantics**
 
-- Values use the same double-quoted string rules as other config strings (existing escapes). No semver validation in v1 unless a later task adds it.
-- **Module-level only:** `module.*` keys must not appear in workflow-level `config { }` blocks. After parsing, reject workflow-level config that sets any `module.*` key, using the same pattern as the existing `runtime.*` workflow guard in `src/parse/workflows.ts`.
-- Stored on `WorkflowMetadata` as descriptive metadata only. They do **not** map into `JaiphConfig`, environment resolution, or the Node workflow runtime unless a future task wires them (e.g. MCP tool metadata).
+* Values use the same double-quoted string rules as other config strings (existing escapes). No semver validation in v1 unless a later task adds it.
+* **Module-level only:** `module.*` keys must not appear in workflow-level `config { }` blocks. After parsing, reject workflow-level config that sets any `module.*` key, using the same pattern as the existing `runtime.*` workflow guard in `src/parse/workflows.ts`.
+* Stored on `WorkflowMetadata` as descriptive metadata only. They do **not** map into `JaiphConfig`, environment resolution, or the Node workflow runtime unless a future task wires them (e.g. MCP tool metadata).
 
 **Implementation touchpoints**
 
-- `src/parse/metadata.ts` — `ALLOWED_KEYS`, `KEY_TYPES`, `assignConfigKey`.
-- `src/types.ts` — optional `module?: { name?: string; version?: string; description?: string }` on `WorkflowMetadata`.
-- `src/format/emit.ts` — formatter round-trip for the new keys.
-- `src/parse/workflows.ts` — workflow-level rejection for `module.*` (mirror `metadata.runtime`).
-- Tests: `src/parse/parse-metadata.test.ts`; update parse-error golden/txtar cases if the unknown-key allowed-list appears in expectations.
-- Docs: `docs/configuration.md`, `docs/grammar.md` (`config_key`).
+* `src/parse/metadata.ts` — `ALLOWED_KEYS`, `KEY_TYPES`, `assignConfigKey`.
+* `src/types.ts` — optional `module?: { name?: string; version?: string; description?: string }` on `WorkflowMetadata`.
+* `src/format/emit.ts` — formatter round-trip for the new keys.
+* `src/parse/workflows.ts` — workflow-level rejection for `module.*` (mirror `metadata.runtime`).
+* Tests: `src/parse/parse-metadata.test.ts`; update parse-error golden/txtar cases if the unknown-key allowed-list appears in expectations.
+* Docs: `docs/configuration.md`, `docs/grammar.md` (`config_key`).
 
 **Non-goals**
 
-- Environment variables, CLI output, or runtime behavior changes beyond parsing/formatting/validation.
+* Environment variables, CLI output, or runtime behavior changes beyond parsing/formatting/validation.
 
 **Queue coordination**
 
-- No conflict with the queued `jaiph serve` MCP task; future work may read `module.description` for tool listings.
+* No conflict with the queued `jaiph serve` MCP task; future work may read `module.description` for tool listings.
 
 **Acceptance criteria**
 
-- Module-level `config` accepts `module.name`, `module.version`, and `module.description`; values round-trip through `jaiph format`.
-- Workflow-level `config` containing any `module.*` assignment fails with an explicit error (consistent with `runtime.*` workflow rules).
-- Unit tests cover happy path and workflow rejection; docs and grammar list the keys.
+* Module-level `config` accepts `module.name`, `module.version`, and `module.description`; values round-trip through `jaiph format`.
+* Workflow-level `config` containing any `module.*` assignment fails with an explicit error (consistent with `runtime.*` workflow rules).
+* Unit tests cover happy path and workflow rejection; docs and grammar list the keys.
 
 **Scope note**
 
-- Expect more than three files (parser, types, formatter, workflows guard, tests, docs); keep the existing plain `assignConfigKey` style — no new abstraction layers.
+* Expect more than three files (parser, types, formatter, workflows guard, tests, docs); keep the existing plain `assignConfigKey` style — no new abstraction layers.
 
 ## Runtime — harden Docker execution environment #dev-ready
 
@@ -154,7 +86,7 @@ Docker mode is the isolation boundary for workflow runs. Harden it: least-privil
 
 **Scope note**
 
-* `docker.ts` is already large (~650+ lines); prefer small helpers or one focused sibling module over speculative abstractions. Expect at least `docker.ts`, `docker.test.ts`, and `docs/sandboxing.md`; split follow-ups if the change set outgrows one cycle.
+* `docker.ts` is already large (\~650+ lines); prefer small helpers or one focused sibling module over speculative abstractions. Expect at least `docker.ts`, `docker.test.ts`, and `docs/sandboxing.md`; split follow-ups if the change set outgrows one cycle.
 
 ## Runtime — default Docker when not CI or unsafe #dev-ready
 
@@ -177,6 +109,8 @@ Introduce **`JAIPH_UNSAFE=true`** as the explicit "run on host / skip Docker def
 
 ***
 
+## Docker sandbox: Figure out a way to pass code changes from engineer.jh in docker mode to local env (git patch saved to .jaiph/runs?)
+
 ## Runtime — credential proxy for Docker mode
 
 **Goal**
@@ -214,7 +148,7 @@ Containers should never hold real API keys. Implement a host-side HTTP proxy (th
 
 **Scope note**
 
-* Target **~3 files**: one small new module for the proxy plus focused edits in `docker.ts` and `run.ts`. Plain functions, no new abstraction layers.
+* Target **\~3 files**: one small new module for the proxy plus focused edits in `docker.ts` and `run.ts`. Plain functions, no new abstraction layers.
 
 ## `jaiph serve` — expose workflows as an MCP server #dev-ready
 
diff --git a/docker/Dockerfile.runtime b/docker/Dockerfile.runtime
new file mode 100644
index 00000000..0e3baad6
--- /dev/null
+++ b/docker/Dockerfile.runtime
@@ -0,0 +1,42 @@
+# Official Jaiph runtime image — ghcr.io/jaiphlang/jaiph-runtime
+#
+# This is the minimal image used by `jaiph run --docker` when no custom image
+# is configured. It contains Node.js, jaiph, and fuse-overlayfs for the
+# copy-on-write workspace overlay.
+#
+# Agent CLIs (Claude Code, cursor-agent) are excluded to keep the image small.
+# To add them, extend this image or use a custom .jaiph/Dockerfile.
+#
+# Tags:
+#   :<semver>  — matches the jaiph npm package version (built on release)
+#   :nightly   — built from the nightly branch on each CI run
+
+FROM node:20-bookworm-slim
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+      bash \
+      curl \
+      git \
+      ca-certificates \
+      fuse-overlayfs \
+      fuse3 \
+      rsync && \
+    rm -rf /var/lib/apt/lists/*
+
+# Non-root user for sandbox safety.
+RUN useradd --create-home --uid 10001 --shell /bin/bash jaiph && \
+    mkdir -p /jaiph/workspace /jaiph/workspace-ro /jaiph/run && \
+    chown -R jaiph:jaiph /jaiph
+
+# Install jaiph from the local tarball (provided at build time via --build-arg).
+# The tarball is produced by `npm pack` in CI before the docker build step.
+ARG JAIPH_TARBALL=jaiph.tgz
+COPY ${JAIPH_TARBALL} /tmp/jaiph.tgz
+RUN npm install -g /tmp/jaiph.tgz && rm -f /tmp/jaiph.tgz
+
+USER jaiph
+ENV HOME=/home/jaiph
+ENV PATH="/home/jaiph/.local/bin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
+
+WORKDIR /jaiph/workspace
diff --git a/docs/architecture.md b/docs/architecture.md
index 787b33cb..936e3022 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -56,7 +56,7 @@ All orchestration — local `jaiph run`, `jaiph test`, and **Docker `jaiph run`*
   - `jaiph format` rewrites `.jh` / `.test.jh` files into canonical style. Pure AST→text emitter; no side-effects beyond file writes.
 
 - **Docker runtime helper (`src/runtime/docker.ts`)**
-  - Parses mount specs, resolves Docker config (image, network, timeout), and builds the `docker run` invocation used by `jaiph run --docker`. The container runs the same `node-workflow-runner` process as local execution. The spawn call uses `stdio: ["ignore", "pipe", "pipe"]` — stdin is ignored to prevent the Docker CLI from blocking on stdin EOF, which would stall event streaming and cause the host CLI to hang after the container exits.
+  - Parses mount specs, resolves Docker config (image, network, timeout), and builds the `docker run` invocation used by `jaiph run --docker`. The container runs the same `node-workflow-runner` process as local execution. The default image is the official `ghcr.io/jaiphlang/jaiph-runtime` GHCR image; every selected image must already contain `jaiph` (no auto-install or derived-image build at runtime). The spawn call uses `stdio: ["ignore", "pipe", "pipe"]` — stdin is ignored to prevent the Docker CLI from blocking on stdin EOF, which would stall event streaming and cause the host CLI to hang after the container exits.
 
 ## Runtime vs CLI responsibilities
 
diff --git a/docs/cli.md b/docs/cli.md
index 04fd28a6..a124946d 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -304,7 +304,7 @@ Creates:
 
 - `.jaiph/.gitignore` — lists `runs` and `tmp`. If the file already exists and does not match this exact list, `jaiph init` exits with a non-zero status.
 - `.jaiph/bootstrap.jh` — canonical bootstrap workflow; made executable. The template uses a triple-quoted multiline prompt body (`prompt """ ... """`) so the generated file parses and compiles as valid Jaiph. It also asks the agent to review/update `.jaiph/Dockerfile` for this repository and ends by logging a summary (`WHAT CHANGED` + `WHY`).
-- `.jaiph/Dockerfile` — canonical Docker sandbox template generated by init. It uses `ubuntu:latest`, installs standard utilities, Node.js LTS, Claude Code CLI, cursor-agent, then installs Jaiph via `curl -fsSL https://jaiph.org/install | bash`. If the file is missing, init creates it. If it already exists and includes the init marker comment, init updates it to the latest template. Otherwise (custom user-managed Dockerfile), init leaves it unchanged and prints a note.
+- `.jaiph/Dockerfile` — Docker sandbox template that extends the official `ghcr.io/jaiphlang/jaiph-runtime:nightly` image with agent CLIs (Claude Code, cursor-agent). The base image already contains Node.js, jaiph, and `fuse-overlayfs`, so the generated Dockerfile only adds project-specific tooling. If the file is missing, init creates it. If it already exists and includes the init marker comment, init updates it to the latest template. Otherwise (custom user-managed Dockerfile), init leaves it unchanged and prints a note.
 - `.jaiph/SKILL.md` — copied from the skill file bundled with your Jaiph installation (or from `JAIPH_SKILL_PATH` when set). If no skill file is found, this file is not written and a note is printed.
 
 ## `jaiph install`
@@ -421,7 +421,7 @@ These variables apply to `jaiph run` and workflow execution. Variables marked **
 **Docker sandbox:**
 
 - `JAIPH_DOCKER_ENABLED` — set to `true` to enable Docker sandbox (overrides in-file `runtime.docker_enabled`).
-- `JAIPH_DOCKER_IMAGE` — Docker image for sandbox (overrides in-file `runtime.docker_image`).
+- `JAIPH_DOCKER_IMAGE` — Docker image for sandbox (overrides in-file `runtime.docker_image`). The image must already contain `jaiph`; if it does not, the run fails with `E_DOCKER_NO_JAIPH`. Defaults to the official GHCR runtime image (`ghcr.io/jaiphlang/jaiph-runtime:<version>`).
 - `JAIPH_DOCKER_NETWORK` — Docker network mode (overrides in-file `runtime.docker_network`).
 - `JAIPH_DOCKER_TIMEOUT` — execution timeout in seconds (overrides in-file `runtime.docker_timeout`).
 
diff --git a/docs/configuration.md b/docs/configuration.md
index 7fa5b25e..ee6d7c1a 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -146,7 +146,7 @@ These configure Docker sandboxing. Unlike agent and run keys, runtime keys are r
 | Key | Type | Default | Env variable | Description |
 |-----|------|---------|--------------|-------------|
 | `runtime.docker_enabled` | boolean | `false` | `JAIPH_DOCKER_ENABLED` | Enable Docker for this run. |
-| `runtime.docker_image` | string | `node:20-bookworm` | `JAIPH_DOCKER_IMAGE` | Image name. When unset, Jaiph builds from `.jaiph/Dockerfile` if it exists, otherwise uses the default. |
+| `runtime.docker_image` | string | `ghcr.io/jaiphlang/jaiph-runtime:<version>` | `JAIPH_DOCKER_IMAGE` | Image name. Must already contain `jaiph`. When unset, Jaiph builds from `.jaiph/Dockerfile` if it exists, otherwise uses the official GHCR image matching the installed jaiph version. |
 | `runtime.docker_network` | string | `default` | `JAIPH_DOCKER_NETWORK` | Docker network mode. |
 | `runtime.docker_timeout` | integer | `300` | `JAIPH_DOCKER_TIMEOUT` | Timeout in seconds. Invalid or unparsable values fall back to the default. |
 | `runtime.workspace` | string[] | `[".:/jaiph/workspace:rw"]` | _(no env override)_ | Mount list. Only settable via in-file config or defaults. |
diff --git a/docs/sandboxing.md b/docs/sandboxing.md
index 6d1be92d..b43d5080 100644
--- a/docs/sandboxing.md
+++ b/docs/sandboxing.md
@@ -50,7 +50,7 @@ All Docker-related keys live under `runtime.*` in module-level config:
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
 | `runtime.docker_enabled` | boolean | `false` | Enable Docker sandbox for the run. |
-| `runtime.docker_image` | string | `"node:20-bookworm"` | Base container image. If it lacks `jaiph`, Jaiph builds a thin derived image and installs the current package into it. |
+| `runtime.docker_image` | string | `"ghcr.io/jaiphlang/jaiph-runtime:<version>"` | Container image. Must already contain `jaiph`. Defaults to the official GHCR runtime image matching the installed jaiph version. |
 | `runtime.docker_network` | string | `"default"` | Docker network mode. |
 | `runtime.docker_timeout` | integer | `300` | Max execution time in seconds. `0` disables the timeout. |
 | `runtime.workspace` | string array | `[".:/jaiph/workspace:rw"]` | Mount specifications (see below). |
@@ -92,7 +92,7 @@ Host paths are resolved relative to the workspace root. Each mount is duplicated
   overlay-run.sh      # runtime-generated entrypoint mounted ro from host temp file
 ```
 
-The working directory is `/jaiph/workspace`. The host CLI generates `overlay-run.sh` (a ~10 line bash script) to a temp file and mounts it read-only at `/jaiph/overlay-run.sh`. The container runs `/jaiph/overlay-run.sh jaiph run --raw <file>`. The overlay wrapper sets up fuse-overlayfs, then execs the jaiph command. When the selected image does not already contain `jaiph`, the host first builds a thin derived image from that base and installs the current Jaiph package into it, so the runtime path stays generic. No `COPY` in the project Dockerfile is needed -- `overlay-run.sh` is a jaiph runtime artifact.
+The working directory is `/jaiph/workspace`. The host CLI generates `overlay-run.sh` (a ~10 line bash script) to a temp file and mounts it read-only at `/jaiph/overlay-run.sh`. The container runs `/jaiph/overlay-run.sh jaiph run --raw <file>`. The overlay wrapper sets up fuse-overlayfs, then execs the jaiph command. The image must already contain `jaiph` — Jaiph does not install itself into the container at runtime. No `COPY` in the project Dockerfile is needed for jaiph runtime files — `overlay-run.sh` is a jaiph runtime artifact.
 
 ### Runtime behavior
 
@@ -112,6 +112,22 @@ The working directory is `/jaiph/workspace`. The host CLI generates `overlay-run
 
 **Image pull** -- If the image is not present locally, `docker pull` runs automatically. Pull failure produces `E_DOCKER_PULL`.
 
+### Image contract
+
+**Every Docker image used by Jaiph must already contain a working `jaiph` CLI.** Jaiph does not auto-install itself into containers at runtime — no derived image builds, no `npm pack` bootstrap. If the selected image lacks `jaiph`, the run fails immediately with `E_DOCKER_NO_JAIPH` and guidance to use the official image or install jaiph in a custom image.
+
+### Official runtime image
+
+Jaiph publishes official runtime images to GHCR:
+
+| Tag | Built from | Use case |
+|-----|-----------|----------|
+| `ghcr.io/jaiphlang/jaiph-runtime:<semver>` | Release tags (`v*`) | Production / pinned versions |
+| `ghcr.io/jaiphlang/jaiph-runtime:nightly` | `nightly` branch | Contributors and CI |
+| `ghcr.io/jaiphlang/jaiph-runtime:latest` | Latest release tag | Convenience alias |
+
+The default `runtime.docker_image` is `ghcr.io/jaiphlang/jaiph-runtime:<version>` where `<version>` matches the installed jaiph package version. The official image includes Node.js, jaiph, `fuse-overlayfs`, and a non-root `jaiph` user (UID 10001). It does **not** include agent CLIs (Claude Code, cursor-agent) to keep the image small. To add agent CLIs, extend the official image or use a custom `.jaiph/Dockerfile` (see below).
+
 ### Dockerfile-based image detection
 
 The runtime considers the image explicitly configured when either `runtime.docker_image` appears in the file or `JAIPH_DOCKER_IMAGE` is set in the environment. In that case, `.jaiph/Dockerfile` is not consulted.
@@ -119,11 +135,25 @@ The runtime considers the image explicitly configured when either `runtime.docke
 When the image is not explicit:
 
 1. If `.jaiph/Dockerfile` exists in the workspace root, the runtime builds it, tags the result `jaiph-runtime:latest`, and uses that image. Build failure produces `E_DOCKER_BUILD`.
-2. Otherwise, the default image (`node:20-bookworm`) is pulled if needed.
+2. Otherwise, the default image (`ghcr.io/jaiphlang/jaiph-runtime:<version>`) is pulled if needed.
 
-If the selected base image does not already contain `jaiph`, Jaiph builds a thin derived runtime image from it and installs the current local package with `npm install -g`, then runs the workflow in that derived image.
+After resolving the image (whether from a Dockerfile build, an explicit image, or the default), Jaiph verifies that `jaiph` is available inside the container. If the check fails, the run exits with `E_DOCKER_NO_JAIPH`.
 
-The repository's example `.jaiph/Dockerfile` includes `ubuntu:latest` as a base, Node.js LTS from NodeSource, `fuse-overlayfs`, Claude Code CLI, cursor-agent, and jaiph (installed via the official installer). The image creates a non-root `jaiph` user (UID 10001) and sets `USER jaiph`. Including `fuse-overlayfs` and `jaiph` in the image is still the best path for full sandbox parity and faster startup, but Jaiph can also auto-build a thin derived runtime image when the base image lacks `jaiph`. The Dockerfile does not need to copy any jaiph runtime files -- `overlay-run.sh` is generated by the host CLI and mounted into the container at runtime.
+The `jaiph init` scaffold generates a `.jaiph/Dockerfile` that extends the official runtime image with agent CLIs (Claude Code, cursor-agent). The Dockerfile does not need to copy any jaiph runtime files — `overlay-run.sh` is generated by the host CLI and mounted into the container at runtime.
+
+### Extending the official image
+
+To add project-specific tools or agent CLIs to the official image, create a `.jaiph/Dockerfile`:
+
+```dockerfile
+FROM ghcr.io/jaiphlang/jaiph-runtime:nightly
+
+USER root
+RUN npm install -g @anthropic-ai/claude-code
+USER jaiph
+
+# Add project-specific package managers/build tools below.
+```
 
 ### Environment variable forwarding
 
diff --git a/e2e/lib/common.sh b/e2e/lib/common.sh
index 14febd71..d8c157b3 100644
--- a/e2e/lib/common.sh
+++ b/e2e/lib/common.sh
@@ -463,6 +463,59 @@ EOF
   JAIPH_BIN_DIR="${JAIPH_E2E_BIN_DIR}" curl -fsSL "${E2E_SERVER_URL}/install" | bash
 }
 
+E2E_DOCKER_TEST_IMAGE="${JAIPH_E2E_DOCKER_IMAGE:-}"
+E2E_DOCKER_IMAGE_BUILT=0
+
+# Build a local jaiph-e2e-runtime image from the current source tree.
+# Caches the image name in E2E_DOCKER_TEST_IMAGE so it is built at most once.
+e2e::ensure_docker_test_image() {
+  if [[ -n "${E2E_DOCKER_TEST_IMAGE}" ]]; then
+    return 0
+  fi
+  if ! command -v docker >/dev/null 2>&1 || ! docker info >/dev/null 2>&1; then
+    return 1
+  fi
+  local tag="jaiph-e2e-runtime:local"
+  if [[ "${E2E_DOCKER_IMAGE_BUILT}" == "1" ]]; then
+    E2E_DOCKER_TEST_IMAGE="${tag}"
+    export JAIPH_E2E_DOCKER_IMAGE="${tag}"
+    return 0
+  fi
+  local context_dir
+  context_dir="$(mktemp -d)"
+  (cd "${E2E_REPO_ROOT}" && npm pack --pack-destination "${context_dir}" >/dev/null 2>&1)
+  local tarball
+  tarball="$(ls "${context_dir}"/jaiph-*.tgz 2>/dev/null | head -1)"
+  if [[ -z "${tarball}" ]]; then
+    rm -rf "${context_dir}"
+    return 1
+  fi
+  mv "${tarball}" "${context_dir}/jaiph.tgz"
+  if [[ -f "${E2E_REPO_ROOT}/docker/Dockerfile.runtime" ]]; then
+    cp "${E2E_REPO_ROOT}/docker/Dockerfile.runtime" "${context_dir}/Dockerfile"
+  else
+    cat > "${context_dir}/Dockerfile" <<'EODOCKERFILE'
+FROM node:20-bookworm-slim
+RUN apt-get update && apt-get install -y --no-install-recommends bash curl git ca-certificates fuse-overlayfs fuse3 rsync && rm -rf /var/lib/apt/lists/*
+RUN useradd --create-home --uid 10001 --shell /bin/bash jaiph && mkdir -p /jaiph/workspace /jaiph/workspace-ro /jaiph/run && chown -R jaiph:jaiph /jaiph
+ARG JAIPH_TARBALL=jaiph.tgz
+COPY ${JAIPH_TARBALL} /tmp/jaiph.tgz
+RUN npm install -g /tmp/jaiph.tgz && rm -f /tmp/jaiph.tgz
+USER jaiph
+ENV HOME=/home/jaiph
+ENV PATH="/home/jaiph/.local/bin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
+WORKDIR /jaiph/workspace
+EODOCKERFILE
+  fi
+  if docker build -t "${tag}" --build-arg JAIPH_TARBALL=jaiph.tgz "${context_dir}" >/dev/null 2>&1; then
+    E2E_DOCKER_IMAGE_BUILT=1
+    E2E_DOCKER_TEST_IMAGE="${tag}"
+    export JAIPH_E2E_DOCKER_IMAGE="${tag}"
+  fi
+  rm -rf "${context_dir}"
+  [[ -n "${E2E_DOCKER_TEST_IMAGE}" ]]
+}
+
 e2e::prepare_test_env() {
   local test_name="$1"
   e2e::prepare_shared_context
diff --git a/e2e/tests/00_install_and_init.sh b/e2e/tests/00_install_and_init.sh
index a293b47a..7e5dc51c 100644
--- a/e2e/tests/00_install_and_init.sh
+++ b/e2e/tests/00_install_and_init.sh
@@ -74,40 +74,20 @@ e2e::pass "generated bootstrap workflow compiles"
 e2e::assert_file_exists "${TEST_DIR}/.jaiph/Dockerfile" ".jaiph/Dockerfile exists"
 expected_dockerfile="$(mktemp)"
 cat > "${expected_dockerfile}" <<'EOF'
-FROM ubuntu:latest
+# Extends the official jaiph runtime image with agent CLIs for prompt steps.
+# The base image already contains Node.js, jaiph, and fuse-overlayfs.
+# For a minimal image without agent CLIs, use ghcr.io/jaiphlang/jaiph-runtime directly.
+FROM ghcr.io/jaiphlang/jaiph-runtime:nightly
 
 # Generated by jaiph init for project sandboxing.
 # Keep this file aligned with your repository's runtime/build/test needs.
 
-# Standard utilities + fuse-overlayfs for CoW sandbox
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-      bash \
-      curl \
-      git \
-      ca-certificates \
-      gnupg \
-      fuse-overlayfs \
-      fuse3 \
-      rsync && \
-    rm -rf /var/lib/apt/lists/*
-
-# Node.js latest LTS (required by jaiph prompt stream helpers)
-RUN curl -fsSL https://deb.nodesource.com/setup_lts.x | bash - && \
-    apt-get install -y --no-install-recommends nodejs && \
-    rm -rf /var/lib/apt/lists/*
-
-# Non-root user keeps agent CLIs happy in Docker mode.
-RUN useradd --create-home --uid 10001 --shell /bin/bash jaiph && \
-    mkdir -p /jaiph/workspace /jaiph/workspace-ro /jaiph/run && \
-    chown -R jaiph:jaiph /jaiph
+USER root
 
 # Claude Code CLI (Anthropic)
 RUN npm install -g @anthropic-ai/claude-code
 
 USER jaiph
-ENV HOME=/home/jaiph
-ENV PATH="/home/jaiph/.local/bin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
 
 # cursor-agent (Cursor) — install as the runtime user so the binary remains
 # reachable after switching away from root. The installer currently places
@@ -126,16 +106,13 @@ RUN mkdir -p "$HOME/.local/bin" && \
     command -v cursor-agent >/dev/null 2>&1 && \
     rm -f /tmp/install-cursor-agent.sh
 
-# jaiph (official installer: https://jaiph.org/install)
-RUN curl -fsSL https://jaiph.org/install | bash
-
 # Add project-specific package managers/build tools below as needed.
 
 WORKDIR /jaiph/workspace
 EOF
 if ! cmp -s "${TEST_DIR}/.jaiph/Dockerfile" "${expected_dockerfile}"; then
   rm -f "${expected_dockerfile}"
-  e2e::fail "Expected .jaiph/Dockerfile to match init template with jaiph.org installer"
+  e2e::fail "Expected .jaiph/Dockerfile to match init template extending official GHCR image"
 fi
 rm -f "${expected_dockerfile}"
 e2e::pass ".jaiph/Dockerfile matches expected init template content"
diff --git a/e2e/tests/72_docker_run_artifacts.sh b/e2e/tests/72_docker_run_artifacts.sh
index face0baa..c461002d 100755
--- a/e2e/tests/72_docker_run_artifacts.sh
+++ b/e2e/tests/72_docker_run_artifacts.sh
@@ -16,6 +16,13 @@ if ! command -v docker >/dev/null 2>&1 || ! docker info >/dev/null 2>&1; then
   exit 0
 fi
 
+# Build a local test image with jaiph installed from current source.
+if ! e2e::ensure_docker_test_image; then
+  e2e::section "docker run artifacts (skipped — test image build failed)"
+  e2e::skip "Could not build local Docker test image"
+  exit 0
+fi
+
 e2e::section "docker run artifacts — happy path"
 
 # Given: a simple workflow that produces stdout artifacts
@@ -32,9 +39,9 @@ workflow default() {
 }
 EOF
 
-# When: run with Docker enabled (override the e2e default of JAIPH_DOCKER_ENABLED=false)
-if ! JAIPH_DOCKER_ENABLED=true jaiph run "${TEST_DIR}/docker_artifacts.jh" >/dev/null 2>&1; then
-  JAIPH_DOCKER_ENABLED=true jaiph run "${TEST_DIR}/docker_artifacts.jh"
+# When: run with Docker enabled using the E2E test image
+if ! JAIPH_DOCKER_ENABLED=true JAIPH_DOCKER_IMAGE="${E2E_DOCKER_TEST_IMAGE}" jaiph run "${TEST_DIR}/docker_artifacts.jh" >/dev/null 2>&1; then
+  JAIPH_DOCKER_ENABLED=true JAIPH_DOCKER_IMAGE="${E2E_DOCKER_TEST_IMAGE}" jaiph run "${TEST_DIR}/docker_artifacts.jh"
   e2e::fail "docker: jaiph run docker_artifacts.jh failed"
 fi
 
@@ -77,7 +84,7 @@ EOF
 rm -rf "${TEST_DIR}/custom_runs"
 
 # When: run with Docker and relative JAIPH_RUNS_DIR
-(cd "${TEST_DIR}" && JAIPH_DOCKER_ENABLED=true JAIPH_RUNS_DIR="custom_runs" jaiph run "${TEST_DIR}/docker_rel_runs.jh" >/dev/null 2>&1)
+(cd "${TEST_DIR}" && JAIPH_DOCKER_ENABLED=true JAIPH_DOCKER_IMAGE="${E2E_DOCKER_TEST_IMAGE}" JAIPH_RUNS_DIR="custom_runs" jaiph run "${TEST_DIR}/docker_rel_runs.jh" >/dev/null 2>&1)
 
 # Then: artifacts should be under the relative dir on host
 rel_run_dir="$(e2e::run_dir_at "${TEST_DIR}/custom_runs" "docker_rel_runs.jh")"
@@ -105,7 +112,7 @@ abs_runs_dir="${TEST_DIR}/abs_runs"
 rm -rf "${abs_runs_dir}"
 
 # When: run with absolute JAIPH_RUNS_DIR inside workspace
-JAIPH_DOCKER_ENABLED=true JAIPH_RUNS_DIR="${abs_runs_dir}" jaiph run "${TEST_DIR}/docker_abs_runs.jh" >/dev/null 2>&1
+JAIPH_DOCKER_ENABLED=true JAIPH_DOCKER_IMAGE="${E2E_DOCKER_TEST_IMAGE}" JAIPH_RUNS_DIR="${abs_runs_dir}" jaiph run "${TEST_DIR}/docker_abs_runs.jh" >/dev/null 2>&1
 
 # Then: artifacts should be under the absolute path on host
 abs_run_dir="$(e2e::run_dir_at "${abs_runs_dir}" "docker_abs_runs.jh")"
@@ -131,9 +138,29 @@ EOF
 
 # When/Then: absolute path outside workspace should fail
 outside_dir="/tmp/jaiph-outside-workspace-test-$$"
-if JAIPH_DOCKER_ENABLED=true JAIPH_RUNS_DIR="${outside_dir}" jaiph run "${TEST_DIR}/docker_outside.jh" >/dev/null 2>&1; then
+if JAIPH_DOCKER_ENABLED=true JAIPH_DOCKER_IMAGE="${E2E_DOCKER_TEST_IMAGE}" JAIPH_RUNS_DIR="${outside_dir}" jaiph run "${TEST_DIR}/docker_outside.jh" >/dev/null 2>&1; then
   rm -rf "${outside_dir}"
   e2e::fail "docker: absolute JAIPH_RUNS_DIR outside workspace should fail"
 fi
 rm -rf "${outside_dir}"
 e2e::pass "docker: absolute JAIPH_RUNS_DIR outside workspace exits non-zero"
+
+e2e::section "docker run artifacts — image without jaiph fails fast"
+
+# Given: a workflow and a stock image that does NOT contain jaiph
+e2e::file "docker_no_jaiph.jh" <<'EOF'
+script greet_impl = ```
+echo "should not run"
+```
+workflow default() {
+  run greet_impl()
+}
+EOF
+
+# When/Then: using an image without jaiph should fail with E_DOCKER_NO_JAIPH
+error_output=""
+if error_output="$(JAIPH_DOCKER_ENABLED=true JAIPH_DOCKER_IMAGE=node:20-bookworm-slim jaiph run "${TEST_DIR}/docker_no_jaiph.jh" 2>&1)"; then
+  e2e::fail "docker: image without jaiph should fail"
+fi
+# assert_contains: error message varies by image name and guidance text
+e2e::assert_contains "${error_output}" "E_DOCKER_NO_JAIPH" "docker: missing jaiph produces E_DOCKER_NO_JAIPH error"
diff --git a/e2e/tests/73_docker_dockerfile_detection.sh b/e2e/tests/73_docker_dockerfile_detection.sh
index 2bf3fee2..286e9bd1 100644
--- a/e2e/tests/73_docker_dockerfile_detection.sh
+++ b/e2e/tests/73_docker_dockerfile_detection.sh
@@ -16,13 +16,33 @@ if ! command -v docker >/dev/null 2>&1 || ! docker info >/dev/null 2>&1; then
   exit 0
 fi
 
+# Build the E2E test image (used for explicit-image tests below).
+if ! e2e::ensure_docker_test_image; then
+  e2e::section "docker dockerfile detection (skipped — test image build failed)"
+  e2e::skip "Could not build local Docker test image"
+  exit 0
+fi
+
 e2e::section "docker dockerfile detection — custom Dockerfile builds and runs"
 
-# Given: a .jaiph/Dockerfile that produces a minimal image with a marker file
+# Given: a .jaiph/Dockerfile that produces an image with jaiph AND a marker file.
+# We install jaiph from a local tarball so the custom image satisfies the strict contract.
 mkdir -p "${TEST_DIR}/.jaiph"
-cat > "${TEST_DIR}/.jaiph/Dockerfile" <<'DOCKERFILE'
-FROM node:20-bookworm
+
+(cd "${ROOT_DIR}" && npm pack --pack-destination "${TEST_DIR}/.jaiph" >/dev/null 2>&1)
+tarball_name="$(ls "${TEST_DIR}/.jaiph"/jaiph-*.tgz 2>/dev/null | head -1 | xargs basename)"
+
+cat > "${TEST_DIR}/.jaiph/Dockerfile" <<DOCKERFILE
+FROM node:20-bookworm-slim
+RUN apt-get update && apt-get install -y --no-install-recommends bash fuse-overlayfs fuse3 rsync && rm -rf /var/lib/apt/lists/*
+RUN useradd --create-home --uid 10001 --shell /bin/bash jaiph && mkdir -p /jaiph/workspace /jaiph/workspace-ro /jaiph/run && chown -R jaiph:jaiph /jaiph
+COPY ${tarball_name} /tmp/jaiph.tgz
+RUN npm install -g /tmp/jaiph.tgz && rm -f /tmp/jaiph.tgz
 RUN touch /jaiph-runtime-marker
+USER jaiph
+ENV HOME=/home/jaiph
+ENV PATH="/home/jaiph/.local/bin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
+WORKDIR /jaiph/workspace
 DOCKERFILE
 
 e2e::file "dockerfile_detect.jh" <<'EOF'
@@ -67,15 +87,15 @@ workflow default() {
 EOF
 
 # When: run with Docker enabled AND explicit image (should skip Dockerfile)
-JAIPH_DOCKER_ENABLED=true JAIPH_DOCKER_IMAGE=node:20-bookworm jaiph run "${TEST_DIR}/dockerfile_skip.jh" >/dev/null 2>&1
+JAIPH_DOCKER_ENABLED=true JAIPH_DOCKER_IMAGE="${E2E_DOCKER_TEST_IMAGE}" jaiph run "${TEST_DIR}/dockerfile_skip.jh" >/dev/null 2>&1
 
-# Then: the marker file should NOT exist (stock pulled image, not custom build)
+# Then: the marker file should NOT exist (E2E test image, not custom build)
 e2e::expect_run_file "dockerfile_skip.jh" "000003-script__check_no_marker_impl.out" "no marker"
 e2e::pass "docker: explicit image skips .jaiph/Dockerfile"
 
-e2e::section "docker dockerfile detection — fallback without Dockerfile"
+e2e::section "docker dockerfile detection — fallback without Dockerfile uses configured image"
 
-# Given: a separate test dir without .jaiph/Dockerfile
+# Given: a separate test dir without .jaiph/Dockerfile, using the E2E test image
 fallback_dir="$(mktemp -d "${JAIPH_E2E_WORK_DIR}/docker_fallback.XXXXXX")"
 cat > "${fallback_dir}/fallback.jh" <<'EOF'
 script greet_impl = ```
@@ -90,14 +110,14 @@ workflow default() {
 }
 EOF
 
-# When: run with Docker enabled but no .jaiph/Dockerfile present
-JAIPH_DOCKER_ENABLED=true JAIPH_WORKSPACE="${fallback_dir}" jaiph run "${fallback_dir}/fallback.jh" >/dev/null 2>&1
+# When: run with Docker enabled and explicit E2E image (no .jaiph/Dockerfile present)
+JAIPH_DOCKER_ENABLED=true JAIPH_DOCKER_IMAGE="${E2E_DOCKER_TEST_IMAGE}" JAIPH_WORKSPACE="${fallback_dir}" jaiph run "${fallback_dir}/fallback.jh" >/dev/null 2>&1
 
-# Then: should use default Node image (bash + node for JS kernel) and succeed
+# Then: should succeed using the configured image
 fallback_run_dir="$(e2e::run_dir_at "${fallback_dir}/.jaiph/runs" "fallback.jh")"
 fallback_summary="${fallback_run_dir}run_summary.jsonl"
 e2e::assert_file_exists "${fallback_summary}" "docker: fallback run_summary.jsonl exists"
-e2e::pass "docker: falls back to default image without .jaiph/Dockerfile"
+e2e::pass "docker: falls back to configured image without .jaiph/Dockerfile"
 
 e2e::section "docker dockerfile detection — agent env vars are forwarded"
 
@@ -118,6 +138,7 @@ EOF
 
 # When: run with agent env vars set on host
 JAIPH_DOCKER_ENABLED=true \
+  JAIPH_DOCKER_IMAGE="${E2E_DOCKER_TEST_IMAGE}" \
   ANTHROPIC_API_KEY="test-key-123" \
   CURSOR_SESSION="test-session-456" \
   jaiph run "${TEST_DIR}/envforward.jh" >/dev/null 2>&1
diff --git a/e2e/tests/74_docker_lifecycle.sh b/e2e/tests/74_docker_lifecycle.sh
index c2dd9dcb..54f2bbee 100755
--- a/e2e/tests/74_docker_lifecycle.sh
+++ b/e2e/tests/74_docker_lifecycle.sh
@@ -16,6 +16,13 @@ if ! command -v docker >/dev/null 2>&1 || ! docker info >/dev/null 2>&1; then
   exit 0
 fi
 
+# Build a local test image with jaiph installed from current source.
+if ! e2e::ensure_docker_test_image; then
+  e2e::section "docker lifecycle (skipped — test image build failed)"
+  e2e::skip "Could not build local Docker test image"
+  exit 0
+fi
+
 # ---------------------------------------------------------------------------
 # Early container exit / failed startup path
 # ---------------------------------------------------------------------------
@@ -39,7 +46,7 @@ EOF
 
 # When: run with Docker enabled — the container should fail and jaiph should
 # exit promptly (within 30 seconds), not hang in RUNNING.
-if timeout 30 bash -c "JAIPH_DOCKER_ENABLED=true jaiph run '${TEST_DIR}/early_exit.jh' >/dev/null 2>&1"; then
+if timeout 30 bash -c "JAIPH_DOCKER_ENABLED=true JAIPH_DOCKER_IMAGE='${E2E_DOCKER_TEST_IMAGE}' jaiph run '${TEST_DIR}/early_exit.jh' >/dev/null 2>&1"; then
   e2e::fail "docker: early_exit.jh should have failed but exited 0"
 fi
 exit_code=$?
@@ -77,7 +84,7 @@ workflow default() {
 EOF
 
 # When: run with Docker enabled
-if ! timeout 60 bash -c "JAIPH_DOCKER_ENABLED=true jaiph run '${TEST_DIR}/stream_check.jh' >/dev/null 2>&1"; then
+if ! timeout 60 bash -c "JAIPH_DOCKER_ENABLED=true JAIPH_DOCKER_IMAGE='${E2E_DOCKER_TEST_IMAGE}' jaiph run '${TEST_DIR}/stream_check.jh' >/dev/null 2>&1"; then
   e2e::fail "docker: stream_check.jh failed"
 fi
 
diff --git a/src/cli/commands/init.test.ts b/src/cli/commands/init.test.ts
index e411f5db..af7fdd21 100644
--- a/src/cli/commands/init.test.ts
+++ b/src/cli/commands/init.test.ts
@@ -50,17 +50,16 @@ test("init: generated bootstrap uses triple-quoted prompt and parses", () => {
   }
 });
 
-test("init: creates .jaiph/Dockerfile with jaiph installer", () => {
+test("init: creates .jaiph/Dockerfile extending official GHCR image", () => {
   const dir = makeTempDir();
   try {
     assert.equal(runInit([dir]), 0);
     const dockerfilePath = join(dir, ".jaiph", "Dockerfile");
     assert.equal(existsSync(dockerfilePath), true);
     const dockerfile = readFileSync(dockerfilePath, "utf8");
-    assert.equal(dockerfile.includes("FROM ubuntu:latest"), true);
-    assert.equal(dockerfile.includes("ca-certificates"), true);
-    assert.equal(dockerfile.includes("setup_lts.x"), true);
-    assert.equal(dockerfile.includes(JAIPH_INSTALL_COMMAND), true);
+    assert.equal(dockerfile.includes("ghcr.io/jaiphlang/jaiph-runtime"), true);
+    assert.equal(dockerfile.includes("cursor"), true);
+    assert.equal(dockerfile.includes("claude-code"), true);
   } finally {
     rmSync(dir, { recursive: true, force: true });
   }
diff --git a/src/cli/commands/init.ts b/src/cli/commands/init.ts
index d777bccb..43b16865 100644
--- a/src/cli/commands/init.ts
+++ b/src/cli/commands/init.ts
@@ -37,40 +37,20 @@ workflow default() {
 const JAIPH_DIR_GITIGNORE_TEMPLATE = "runs\ntmp\n";
 const DOCKERFILE_TEMPLATE_MARKER = "# Generated by jaiph init for project sandboxing.";
 const JAIPH_INSTALL_COMMAND = "curl -fsSL https://jaiph.org/install | bash";
-const JAIPH_DOCKERFILE_TEMPLATE = `FROM ubuntu:latest
+const JAIPH_DOCKERFILE_TEMPLATE = `# Extends the official jaiph runtime image with agent CLIs for prompt steps.
+# The base image already contains Node.js, jaiph, and fuse-overlayfs.
+# For a minimal image without agent CLIs, use ghcr.io/jaiphlang/jaiph-runtime directly.
+FROM ghcr.io/jaiphlang/jaiph-runtime:nightly
 
 ${DOCKERFILE_TEMPLATE_MARKER}
 # Keep this file aligned with your repository's runtime/build/test needs.
 
-# Standard utilities + fuse-overlayfs for CoW sandbox
-RUN apt-get update && \\
-    apt-get install -y --no-install-recommends \\
-      bash \\
-      curl \\
-      git \\
-      ca-certificates \\
-      gnupg \\
-      fuse-overlayfs \\
-      fuse3 \\
-      rsync && \\
-    rm -rf /var/lib/apt/lists/*
-
-# Node.js latest LTS (required by jaiph prompt stream helpers)
-RUN curl -fsSL https://deb.nodesource.com/setup_lts.x | bash - && \\
-    apt-get install -y --no-install-recommends nodejs && \\
-    rm -rf /var/lib/apt/lists/*
-
-# Non-root user keeps agent CLIs happy in Docker mode.
-RUN useradd --create-home --uid 10001 --shell /bin/bash jaiph && \\
-    mkdir -p /jaiph/workspace /jaiph/workspace-ro /jaiph/run && \\
-    chown -R jaiph:jaiph /jaiph
+USER root
 
 # Claude Code CLI (Anthropic)
 RUN npm install -g @anthropic-ai/claude-code
 
 USER jaiph
-ENV HOME=/home/jaiph
-ENV PATH="/home/jaiph/.local/bin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
 
 # cursor-agent (Cursor) — install as the runtime user so the binary remains
 # reachable after switching away from root. The installer currently places
@@ -89,9 +69,6 @@ RUN mkdir -p "$HOME/.local/bin" && \\
     command -v cursor-agent >/dev/null 2>&1 && \\
     rm -f /tmp/install-cursor-agent.sh
 
-# jaiph (official installer: https://jaiph.org/install)
-RUN ${JAIPH_INSTALL_COMMAND}
-
 # Add project-specific package managers/build tools below as needed.
 
 WORKDIR /jaiph/workspace
diff --git a/src/runtime/docker.test.ts b/src/runtime/docker.test.ts
index 9afbe728..c9d4ef7a 100644
--- a/src/runtime/docker.test.ts
+++ b/src/runtime/docker.test.ts
@@ -13,6 +13,8 @@ import {
   writeOverlayScript,
   resolveImage,
   buildImageFromDockerfile,
+  verifyImageHasJaiph,
+  GHCR_IMAGE_REPO,
   type MountSpec,
   type DockerRunConfig,
   type DockerSpawnOptions,
@@ -137,7 +139,7 @@ test("parseMounts: throws when no workspace mount", () => {
 test("resolveDockerConfig: defaults when no in-file and no env", () => {
   const cfg = resolveDockerConfig(undefined, {});
   assert.equal(cfg.enabled, false);
-  assert.equal(cfg.image, "node:20-bookworm");
+  assert.ok(cfg.image.startsWith(GHCR_IMAGE_REPO + ":"), `default image should be GHCR: ${cfg.image}`);
   assert.equal(cfg.network, "default");
   assert.equal(cfg.timeout, 300);
   assert.equal(cfg.mounts.length, 1);
@@ -505,6 +507,14 @@ test("resolveDockerConfig: imageExplicit is true when in-file sets image", () =>
   assert.equal(cfg.image, "alpine:3.19");
 });
 
+// ---------------------------------------------------------------------------
+// GHCR_IMAGE_REPO
+// ---------------------------------------------------------------------------
+
+test("GHCR_IMAGE_REPO: points to official registry", () => {
+  assert.equal(GHCR_IMAGE_REPO, "ghcr.io/jaiphlang/jaiph-runtime");
+});
+
 // ---------------------------------------------------------------------------
 // resolveImage
 // ---------------------------------------------------------------------------
@@ -540,3 +550,25 @@ test("resolveImage: skips Dockerfile when imageExplicit is true", () => {
     rmSync(tmpDir, { recursive: true, force: true });
   }
 });
+
+// ---------------------------------------------------------------------------
+// Strict contract: no auto-build, no npm pack bootstrap
+// ---------------------------------------------------------------------------
+
+test("docker.ts: no auto-build or npm-pack bootstrap code", () => {
+  const src = readFileSync(join(__dirname, "docker.ts"), "utf8");
+  assert.ok(!src.includes("npm pack"), "docker.ts must not contain npm pack");
+  assert.ok(!src.includes("npm install -g"), "docker.ts must not contain npm install -g");
+  assert.ok(!src.includes("jaiph-runtime-auto"), "docker.ts must not reference auto-derived image tag");
+  assert.ok(!src.includes("ensureLocalRuntimeImage"), "docker.ts must not contain ensureLocalRuntimeImage");
+  assert.ok(!src.includes("buildRuntimeImageFromLocalPackage"), "docker.ts must not contain buildRuntimeImageFromLocalPackage");
+});
+
+test("verifyImageHasJaiph: throws E_DOCKER_NO_JAIPH with guidance for missing jaiph", () => {
+  // Unit-test the error message structure without running Docker.
+  // verifyImageHasJaiph uses imageHasJaiph internally which spawns Docker,
+  // so we test the error message format by checking the source contract.
+  const src = readFileSync(join(__dirname, "docker.ts"), "utf8");
+  assert.ok(src.includes("E_DOCKER_NO_JAIPH"), "verifyImageHasJaiph must use E_DOCKER_NO_JAIPH error code");
+  assert.ok(src.includes(GHCR_IMAGE_REPO), "error message must reference official GHCR image");
+});
diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts
index 400d6f4a..83541724 100644
--- a/src/runtime/docker.ts
+++ b/src/runtime/docker.ts
@@ -1,6 +1,5 @@
 import { execFileSync, execSync, spawn, ChildProcess } from "node:child_process";
-import { createHash } from "node:crypto";
-import { existsSync, mkdirSync, mkdtempSync, readdirSync, rmSync, statSync, writeFileSync } from "node:fs";
+import { existsSync, mkdirSync, mkdtempSync, readFileSync, readdirSync, rmSync, statSync, writeFileSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join, resolve, dirname, relative } from "node:path";
 import type { RuntimeConfig } from "../types";
@@ -81,10 +80,25 @@ export function validateMounts(mounts: MountSpec[]): void {
 // Config resolution (env > in-file > defaults)
 // ---------------------------------------------------------------------------
 
+/** Read the package version to derive the default GHCR image tag. */
+function resolveDefaultImageTag(): string {
+  try {
+    const pkgPath = resolve(__dirname, "..", "..", "..", "package.json");
+    const pkg = JSON.parse(readFileSync(pkgPath, "utf8"));
+    if (pkg.version && typeof pkg.version === "string") {
+      return pkg.version;
+    }
+  } catch {
+    // Fall through to nightly.
+  }
+  return "nightly";
+}
+
+export const GHCR_IMAGE_REPO = "ghcr.io/jaiphlang/jaiph-runtime";
+
 const DEFAULTS: DockerRunConfig = {
   enabled: false,
-  /** Node + bash; required for JS kernel (run-step-exec) inside the container. */
-  image: "node:20-bookworm",
+  image: `${GHCR_IMAGE_REPO}:${resolveDefaultImageTag()}`,
   imageExplicit: false,
   network: "default",
   timeout: 300,
@@ -175,7 +189,6 @@ export function pullImageIfNeeded(image: string): void {
 // ---------------------------------------------------------------------------
 
 const DOCKERFILE_IMAGE_TAG = "jaiph-runtime:latest";
-const AUTO_RUNTIME_IMAGE_REPO = "jaiph-runtime-auto";
 
 /**
  * Build a Docker image from a Dockerfile and tag it.
@@ -194,26 +207,6 @@ export function buildImageFromDockerfile(dockerfilePath: string, tag: string = D
   return tag;
 }
 
-function installedPackageRoot(): string {
-  return resolve(__dirname, "..", "..", "..");
-}
-
-function autoRuntimeImageTag(baseImage: string, packageRoot: string): string {
-  const packageJsonPath = join(packageRoot, "package.json");
-  const cliPath = join(packageRoot, "dist", "src", "cli.js");
-  const dockerRuntimePath = join(packageRoot, "dist", "src", "runtime", "docker.js");
-  const nodeWorkflowRuntimePath = join(packageRoot, "dist", "src", "runtime", "kernel", "node-workflow-runtime.js");
-  const packageStamp = existsSync(packageJsonPath) ? statSync(packageJsonPath).mtimeMs : 0;
-  const cliStamp = existsSync(cliPath) ? statSync(cliPath).mtimeMs : 0;
-  const dockerRuntimeStamp = existsSync(dockerRuntimePath) ? statSync(dockerRuntimePath).mtimeMs : 0;
-  const nodeWorkflowRuntimeStamp = existsSync(nodeWorkflowRuntimePath) ? statSync(nodeWorkflowRuntimePath).mtimeMs : 0;
-  const digest = createHash("sha256")
-    .update(`${baseImage}|${resolve(packageRoot)}|${packageStamp}|${cliStamp}|${dockerRuntimeStamp}|${nodeWorkflowRuntimeStamp}`)
-    .digest("hex")
-    .slice(0, 12);
-  return `${AUTO_RUNTIME_IMAGE_REPO}:${digest}`;
-}
-
 function imageHasJaiph(image: string): boolean {
   try {
     execFileSync(
@@ -227,103 +220,17 @@ function imageHasJaiph(image: string): boolean {
   }
 }
 
-function imageConfiguredUser(image: string): string | undefined {
-  try {
-    const raw = execFileSync(
-      "docker",
-      ["image", "inspect", image, "--format", "{{json .Config.User}}"],
-      { encoding: "utf8", timeout: 30_000 },
-    ).trim();
-    const parsed = JSON.parse(raw) as string;
-    return parsed.length > 0 ? parsed : undefined;
-  } catch {
-    return undefined;
-  }
-}
-
-function imageHomeDir(image: string): string | undefined {
-  try {
-    const raw = execFileSync(
-      "docker",
-      ["image", "inspect", image, "--format", "{{json .Config.Env}}"],
-      { encoding: "utf8", timeout: 30_000 },
-    ).trim();
-    const envList = JSON.parse(raw) as string[];
-    for (const entry of envList) {
-      if (entry.startsWith("HOME=")) {
-        const value = entry.slice("HOME=".length);
-        return value.length > 0 ? value : undefined;
-      }
-    }
-  } catch {
-    // Fall through.
-  }
-  return undefined;
-}
-
-function buildRuntimeImageFromLocalPackage(baseImage: string, packageRoot: string, tag: string): string {
-  const contextDir = mkdtempSync(join(tmpdir(), "jaiph-runtime-image-"));
-  try {
-    const tarballName = execFileSync(
-      "npm",
-      ["pack", packageRoot, "--silent", "--pack-destination", contextDir],
-      { cwd: packageRoot, encoding: "utf8", timeout: 300_000 },
-    ).trim().split(/\r?\n/).pop()?.trim();
-    if (!tarballName) {
-      throw new Error("npm pack produced no tarball");
-    }
-    const originalUser = imageConfiguredUser(baseImage);
-    const originalHome = imageHomeDir(baseImage);
-    writeFileSync(
-      join(contextDir, "Dockerfile"),
-      [
-        `FROM ${baseImage}`,
-        `USER root`,
-        `COPY ${tarballName} /tmp/${tarballName}`,
-        `RUN npm install -g /tmp/${tarballName} && rm -f /tmp/${tarballName}` +
-        (originalHome
-          ? ` && JAIPH_NPM_BIN="$(npm prefix -g)/bin/jaiph" && mkdir -p ${originalHome}/.local/bin && ln -sf "$JAIPH_NPM_BIN" ${originalHome}/.local/bin/jaiph`
-          : ""),
-        ...(originalUser ? [`USER ${originalUser}`] : []),
-        "",
-      ].join("\n"),
+/**
+ * Verify that the selected Docker image contains `jaiph`.
+ * Fails fast with an actionable error when the binary is missing.
+ */
+export function verifyImageHasJaiph(image: string): void {
+  if (!imageHasJaiph(image)) {
+    throw new Error(
+      `E_DOCKER_NO_JAIPH the Docker image "${image}" does not contain a jaiph CLI. ` +
+      `Use the official runtime image (${GHCR_IMAGE_REPO}:<version>) or install jaiph ` +
+      `in your custom image. See https://jaiph.org/sandboxing for details.`,
     );
-    execFileSync("docker", ["build", "-t", tag, contextDir], {
-      stdio: "inherit",
-      timeout: 600_000,
-    });
-    return tag;
-  } catch {
-    throw new Error(`E_DOCKER_BUILD failed to build runtime image from base "${baseImage}"`);
-  } finally {
-    rmSync(contextDir, { recursive: true, force: true });
-  }
-}
-
-function ensureLocalRuntimeImage(baseImage: string): string {
-  pullImageIfNeeded(baseImage);
-  const packageRoot = installedPackageRoot();
-  const tag = autoRuntimeImageTag(baseImage, packageRoot);
-  try {
-    execSync(`docker image inspect ${tag}`, { stdio: "ignore", timeout: 30_000 });
-    return tag;
-  } catch {
-    return buildRuntimeImageFromLocalPackage(baseImage, packageRoot, tag);
-  }
-}
-
-function ensureImageHasJaiph(baseImage: string): string {
-  pullImageIfNeeded(baseImage);
-  if (imageHasJaiph(baseImage)) {
-    return baseImage;
-  }
-  const packageRoot = installedPackageRoot();
-  const tag = autoRuntimeImageTag(baseImage, packageRoot);
-  try {
-    execSync(`docker image inspect ${tag}`, { stdio: "ignore", timeout: 30_000 });
-    return tag;
-  } catch {
-    return buildRuntimeImageFromLocalPackage(baseImage, packageRoot, tag);
   }
 }
 
@@ -332,19 +239,26 @@ function ensureImageHasJaiph(baseImage: string): string {
  *
  * When the image was not explicitly configured (`imageExplicit === false`),
  * checks for `.jaiph/Dockerfile` in the workspace root. If present, builds
- * from it and returns the built image tag. Otherwise falls back to the
- * configured (default) image and pulls it if needed.
+ * from it and verifies jaiph is present. Otherwise uses the configured
+ * (default) image — the official GHCR runtime image — and pulls if needed.
+ *
+ * All images are verified to contain `jaiph` before use. If the image
+ * lacks jaiph, the run fails immediately with guidance.
  */
 export function resolveImage(config: DockerRunConfig, workspaceRoot: string): string {
-  let baseImage = config.image;
+  let image = config.image;
   if (!config.imageExplicit) {
     const dockerfilePath = join(workspaceRoot, ".jaiph", "Dockerfile");
     if (existsSync(dockerfilePath)) {
-      baseImage = buildImageFromDockerfile(dockerfilePath);
+      image = buildImageFromDockerfile(dockerfilePath);
+    } else {
+      pullImageIfNeeded(image);
     }
-    return ensureLocalRuntimeImage(baseImage);
+  } else {
+    pullImageIfNeeded(image);
   }
-  return ensureImageHasJaiph(baseImage);
+  verifyImageHasJaiph(image);
+  return image;
 }
 
 // ---------------------------------------------------------------------------

From c5e20c043f8787b33454823b7d6a264d37591d40 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Fri, 17 Apr 2026 22:01:26 +0200
Subject: [PATCH 06/38] Feat: Add optional module manifest keys to config block
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Support module.name, module.version, and module.description as optional
string keys in the module-level config { } block. Values are stored on
WorkflowMetadata.module as descriptive metadata only — they do not
affect agent, run, or runtime behavior. Workflow-level config blocks
reject module.* keys with E_PARSE, consistent with the existing
runtime.* guard. The formatter round-trips all three keys. Unit tests
cover happy path, partial keys, coexistence, round-trip, and
workflow-level rejection. Docs and grammar updated.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CHANGELOG.md                     |  1 +
 QUEUE.md                         | 47 ------------------
 docs/configuration.md            | 30 +++++++++++-
 docs/grammar.md                  |  5 +-
 docs/index.html                  |  4 +-
 src/format/emit.ts               | 14 ++++++
 src/parse/metadata.ts            | 21 ++++++++
 src/parse/parse-metadata.test.ts | 82 ++++++++++++++++++++++++++++++++
 src/parse/workflows.ts           |  6 +++
 src/types.ts                     |  1 +
 10 files changed, 159 insertions(+), 52 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index dd64875c..ea08d0e0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ## All changes
 
+- **Feature — Language:** Optional module manifest keys (`module.name`, `module.version`, `module.description`) — The module-level `config { }` block now accepts three optional descriptive metadata keys: `module.name`, `module.version`, and `module.description`. All three are strings, all optional, and purely informational — they do not affect agent, run, or runtime behavior. Values are stored on `WorkflowMetadata.module` and round-trip through `jaiph format`. No semver validation is applied to `module.version`; any quoted string is accepted. Workflow-level `config` blocks reject `module.*` keys with `E_PARSE`, consistent with the existing `runtime.*` workflow guard. Future features (e.g. MCP tool metadata) may consume these fields. Implementation: `ALLOWED_KEYS` and `assignConfigKey` in `src/parse/metadata.ts`, `WorkflowMetadata.module` in `src/types.ts`, formatter round-trip in `src/format/emit.ts`, workflow-level rejection in `src/parse/workflows.ts`. Unit tests cover happy path, partial keys, coexistence with other config keys, formatter round-trip, and workflow-level rejection. Docs updated (`docs/configuration.md`, `docs/grammar.md`).
 - **Breaking — Docker:** Strict image contract and official GHCR runtime images — Docker mode now enforces a strict contract: every Docker image used by Jaiph must already contain a working `jaiph` CLI. Jaiph no longer auto-builds derived images or bootstraps itself into containers at runtime (no `npm pack`, no `npm install -g` into arbitrary base images). If the selected image lacks `jaiph`, the run fails immediately with `E_DOCKER_NO_JAIPH` and actionable guidance. The default `runtime.docker_image` is now `ghcr.io/jaiphlang/jaiph-runtime:<version>` (matching the installed jaiph version), replacing the previous `node:20-bookworm` default. Official runtime images are published to GHCR: `ghcr.io/jaiphlang/jaiph-runtime:<semver>` for release tags, `:nightly` for the nightly branch, and `:latest` as a convenience alias. The official image includes Node.js, jaiph, `fuse-overlayfs`, and a non-root `jaiph` user (UID 10001); it does not include agent CLIs to keep the image minimal. The `jaiph init` Dockerfile template now extends the official image (`FROM ghcr.io/jaiphlang/jaiph-runtime:nightly`) and only adds agent CLIs (Claude Code, cursor-agent), instead of building from `ubuntu:latest` with a full install chain. Removed functions: `ensureLocalRuntimeImage`, `buildRuntimeImageFromLocalPackage`, `autoRuntimeImageTag`, `imageConfiguredUser`, `imageHomeDir`. Added: `verifyImageHasJaiph`, `GHCR_IMAGE_REPO`, `resolveDefaultImageTag`. CI: new `.github/workflows/docker-publish.yml` publishes the runtime image on release tags and nightly pushes. Implementation: `src/runtime/docker.ts`, `src/cli/commands/init.ts`, `docker/Dockerfile.runtime`. Unit and E2E tests updated for the strict contract — regression test confirms images without jaiph fail with `E_DOCKER_NO_JAIPH`. Docs updated (`docs/sandboxing.md`, `docs/configuration.md`, `docs/cli.md`, `docs/architecture.md`).
 - **Feature — Language/Runtime:** Explicit nested managed calls in argument position — Call arguments can now contain nested managed calls using `run` or `ensure` keywords explicitly: `run foo(run bar())`, `run foo(ensure rule_bar())`, and `run foo(run \`echo "aaa"\`())`. The nested call executes first and its result is passed as a single argument to the outer call. Bare call-like forms in argument position are rejected at compile time: `run foo(bar())` → `E_VALIDATE` with an actionable message telling the user to add `run` or `ensure`. Bare inline script calls in argument position (`run foo(\`echo aaa\`())`) are also rejected with guidance. The explicit capture-then-pass form (`const x = run bar()` followed by `run foo(x)`) remains valid. Bare call-like forms in `const` assignments (`const x = bar()`) are also rejected — use `const x = run bar()`. The formatter round-trips explicit nested forms correctly, including the inline script variant. The runtime evaluates nested managed argument tokens (workflows, scripts, rules, and inline scripts) before passing the result to the outer call. Implementation: validator (`src/transpile/validate.ts` — `validateNestedManagedCallArgs` extended for inline script detection), runtime (`src/runtime/kernel/node-workflow-runtime.ts` — `managed_inline_script` token kind, `parseInlineScriptAt`, `resolveArgsRawSync` fast path), formatter (`src/format/emit.ts` — `parseInlineScriptArg`, inline script formatting in `formatArgs`). Regression tests added for all valid and invalid forms. Docs updated (`docs/language.md`, `docs/grammar.md`, `docs/jaiph-skill.md`).
 
diff --git a/QUEUE.md b/QUEUE.md
index e6ffada4..7c992336 100644
--- a/QUEUE.md
+++ b/QUEUE.md
@@ -12,53 +12,6 @@ Process rules:
 
 ***
 
-## Support optional config properties in Jaiph DSL: version, name, description. #dev-ready
-
-**Goal**
-
-Add optional module-scoped manifest fields in the module-level `config { }` block so a `.jh` file can declare human-readable **name**, **version**, and **description** without changing agent/run/runtime execution.
-
-**Keys (dot-separated, string values)**
-
-* `module.name`
-* `module.version`
-* `module.description`
-
-All optional; omitted keys leave the corresponding field unset.
-
-**Semantics**
-
-* Values use the same double-quoted string rules as other config strings (existing escapes). No semver validation in v1 unless a later task adds it.
-* **Module-level only:** `module.*` keys must not appear in workflow-level `config { }` blocks. After parsing, reject workflow-level config that sets any `module.*` key, using the same pattern as the existing `runtime.*` workflow guard in `src/parse/workflows.ts`.
-* Stored on `WorkflowMetadata` as descriptive metadata only. They do **not** map into `JaiphConfig`, environment resolution, or the Node workflow runtime unless a future task wires them (e.g. MCP tool metadata).
-
-**Implementation touchpoints**
-
-* `src/parse/metadata.ts` — `ALLOWED_KEYS`, `KEY_TYPES`, `assignConfigKey`.
-* `src/types.ts` — optional `module?: { name?: string; version?: string; description?: string }` on `WorkflowMetadata`.
-* `src/format/emit.ts` — formatter round-trip for the new keys.
-* `src/parse/workflows.ts` — workflow-level rejection for `module.*` (mirror `metadata.runtime`).
-* Tests: `src/parse/parse-metadata.test.ts`; update parse-error golden/txtar cases if the unknown-key allowed-list appears in expectations.
-* Docs: `docs/configuration.md`, `docs/grammar.md` (`config_key`).
-
-**Non-goals**
-
-* Environment variables, CLI output, or runtime behavior changes beyond parsing/formatting/validation.
-
-**Queue coordination**
-
-* No conflict with the queued `jaiph serve` MCP task; future work may read `module.description` for tool listings.
-
-**Acceptance criteria**
-
-* Module-level `config` accepts `module.name`, `module.version`, and `module.description`; values round-trip through `jaiph format`.
-* Workflow-level `config` containing any `module.*` assignment fails with an explicit error (consistent with `runtime.*` workflow rules).
-* Unit tests cover happy path and workflow rejection; docs and grammar list the keys.
-
-**Scope note**
-
-* Expect more than three files (parser, types, formatter, workflows guard, tests, docs); keep the existing plain `assignConfigKey` style — no new abstraction layers.
-
 ## Runtime — harden Docker execution environment #dev-ready
 
 **Goal**
diff --git a/docs/configuration.md b/docs/configuration.md
index ee6d7c1a..faa039f6 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -87,7 +87,7 @@ workflow default() {
 **Rules:**
 
 - At most one per workflow; it must be the first non-comment construct in the body. A duplicate is `E_PARSE`: `duplicate config block inside workflow (only one allowed per workflow)`.
-- Only **`agent.*` and `run.*` keys** are allowed. Any `runtime.*` key is `E_PARSE`.
+- Only **`agent.*` and `run.*` keys** are allowed. Any `runtime.*` or `module.*` key is `E_PARSE`.
 - Workflow-level values apply to all steps in that workflow, including `ensure`d rules and scripts called from it. When the workflow finishes, the previous environment is restored.
 
 **Sibling isolation:** Each workflow gets its own clone of the parent environment. Sibling workflows never see each other's config — even when they execute sequentially. If workflow `alpha` sets `agent.backend = "claude"` and workflow `beta` only sets `agent.default_model = "beta-model"`, `beta` still sees the module-level backend (e.g. `"cursor"`), not `alpha`'s.
@@ -137,6 +137,31 @@ These control runtime behavior unrelated to the agent.
 | `run.debug` | boolean | `false` | `JAIPH_DEBUG` | Enables debug tracing for the run. |
 | `run.inbox_parallel` | boolean | `false` | `JAIPH_INBOX_PARALLEL` | Dispatch inbox route targets concurrently. See [Inbox — Parallel dispatch](inbox.md#parallel-dispatch). |
 
+### Module keys
+
+Optional descriptive metadata about the workflow module. These are informational only — they do not affect agent, run, or runtime behavior. Future features (e.g. MCP tool metadata) may consume them.
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| `module.name` | string | _(unset)_ | Human-readable name for this module. |
+| `module.version` | string | _(unset)_ | Version string (no validation — any quoted string is accepted). |
+| `module.description` | string | _(unset)_ | Short description of what this module does. |
+
+Module keys can only appear in **module-level** config blocks. Any `module.*` key inside a workflow-level config is `E_PARSE`.
+
+```jh
+config {
+  module.name = "deploy-pipeline"
+  module.version = "2.0.0"
+  module.description = "Production deployment with rollback"
+  agent.backend = "claude"
+}
+
+workflow default() {
+  log "deploying..."
+}
+```
+
 ### Runtime keys (Docker sandbox — beta)
 
 These configure Docker sandboxing. Unlike agent and run keys, runtime keys are resolved by the `jaiph run` CLI at launch — not by the workflow runtime. They can only appear in **module-level** config blocks (not workflow-level).
@@ -303,6 +328,9 @@ Quick reference for all in-file keys and their environment variable equivalents:
 | `runtime.docker_network` | `JAIPH_DOCKER_NETWORK` |
 | `runtime.docker_timeout` | `JAIPH_DOCKER_TIMEOUT` |
 | `runtime.workspace` | _(no env override)_ |
+| `module.name` | _(no env override)_ |
+| `module.version` | _(no env override)_ |
+| `module.description` | _(no env override)_ |
 
 ## Inspecting effective config at runtime
 
diff --git a/docs/grammar.md b/docs/grammar.md
index d3707263..521355d8 100644
--- a/docs/grammar.md
+++ b/docs/grammar.md
@@ -793,7 +793,8 @@ config_line     = config_key "=" config_value ;
 config_key      = "agent.default_model" | "agent.command" | "agent.backend" | "agent.trusted_workspace"
                 | "agent.cursor_flags" | "agent.claude_flags" | "run.logs_dir" | "run.debug"
                 | "run.inbox_parallel" | "runtime.docker_enabled" | "runtime.docker_image" | "runtime.docker_network"
-                | "runtime.docker_timeout" | "runtime.workspace" ;
+                | "runtime.docker_timeout" | "runtime.workspace"
+                | "module.name" | "module.version" | "module.description" ;
 config_value    = string | "true" | "false" | integer | string_array ;
 integer         = digit { digit } ;
 string_array    = "[" { array_element } "]" ;
@@ -824,7 +825,7 @@ workflow_decl   = [ "export" ] "workflow" IDENT [ "(" param_list ")" ] "{" [ wor
 param_list      = IDENT { "," IDENT } ;  (* identifiers; no duplicates; no reserved keywords *)
 workflow_config = config_block ;
   (* optional per-workflow override; must appear before steps;
-     only agent.* and run.* keys allowed; runtime.* yields E_PARSE *)
+     only agent.* and run.* keys allowed; runtime.* and module.* yield E_PARSE *)
 
 workflow_step   = ensure_stmt | run_stmt | run_catch_stmt | run_async_stmt | prompt_stmt | prompt_capture_stmt
                 | const_decl_step | return_stmt
diff --git a/docs/index.html b/docs/index.html
index 921f590d..58771788 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -503,9 +503,9 @@ <h2>Syntax</h2>
                 <h3>Jaiph workflows</h3>
                 <dl class="primitive-list">
                     <dt><code>config { ... }</code></dt>
-                    <dd>Optional runtime options (agent backend/flags, logs, runtime). Allowed at the top level
+                    <dd>Optional runtime options (agent backend/flags, logs, runtime, module metadata). Allowed at the top level
                         (module-wide) and inside individual workflows (per-workflow overrides for <code>agent.*</code>
-                        and <code>run.*</code> keys). Environment variables override config values. See <a
+                        and <code>run.*</code> keys only; <code>runtime.*</code> and <code>module.*</code> are module-level only). Environment variables override config values. See <a
                             href="configuration">Configuration</a>.</dd>
 
                     <dt><code>import "file.jh" as alias</code> &middot; <code>const name = value</code> /
diff --git a/src/format/emit.ts b/src/format/emit.ts
index 20f02a35..484488a4 100644
--- a/src/format/emit.ts
+++ b/src/format/emit.ts
@@ -182,6 +182,15 @@ function emitConfigKeyLines(meta: WorkflowMetadata, key: string, pad: string): s
       ws.push(`${pad}]`);
       return ws;
     }
+    case "module.name":
+      if (meta.module?.name === undefined) return [];
+      return [`${pad}module.name = "${meta.module.name}"`];
+    case "module.version":
+      if (meta.module?.version === undefined) return [];
+      return [`${pad}module.version = "${meta.module.version}"`];
+    case "module.description":
+      if (meta.module?.description === undefined) return [];
+      return [`${pad}module.description = "${meta.module.description}"`];
     default:
       return [];
   }
@@ -230,6 +239,11 @@ function emitConfig(meta: WorkflowMetadata, pad: string): string {
       }
     }
   }
+  if (meta.module) {
+    if (meta.module.name !== undefined) lines.push(`${pad}module.name = "${meta.module.name}"`);
+    if (meta.module.version !== undefined) lines.push(`${pad}module.version = "${meta.module.version}"`);
+    if (meta.module.description !== undefined) lines.push(`${pad}module.description = "${meta.module.description}"`);
+  }
   lines.push("}");
   return lines.join("\n");
 }
diff --git a/src/parse/metadata.ts b/src/parse/metadata.ts
index ed5f9d8f..0b100024 100644
--- a/src/parse/metadata.ts
+++ b/src/parse/metadata.ts
@@ -17,6 +17,9 @@ const ALLOWED_KEYS = new Set([
   "runtime.docker_network",
   "runtime.docker_timeout",
   "runtime.workspace",
+  "module.name",
+  "module.version",
+  "module.description",
 ]);
 
 /** Expected value type for each key that needs type validation. */
@@ -35,6 +38,9 @@ const KEY_TYPES: Record<string, "string" | "boolean" | "number" | "string[]"> =
   "runtime.docker_network": "string",
   "runtime.docker_timeout": "number",
   "runtime.workspace": "string[]",
+  "module.name": "string",
+  "module.version": "string",
+  "module.description": "string",
 };
 
 function parseMetadataValue(filePath: string, rawLine: string, valuePart: string, lineNo: number): string | boolean | number | string[] {
@@ -222,6 +228,21 @@ function assignConfigKey(
       out.runtime = {};
     }
     out.runtime.workspace = value as string[];
+  } else if (key === "module.name") {
+    if (!out.module) {
+      out.module = {};
+    }
+    out.module.name = value as string;
+  } else if (key === "module.version") {
+    if (!out.module) {
+      out.module = {};
+    }
+    out.module.version = value as string;
+  } else if (key === "module.description") {
+    if (!out.module) {
+      out.module = {};
+    }
+    out.module.description = value as string;
   }
 }
 
diff --git a/src/parse/parse-metadata.test.ts b/src/parse/parse-metadata.test.ts
index 8b121adc..e639a202 100644
--- a/src/parse/parse-metadata.test.ts
+++ b/src/parse/parse-metadata.test.ts
@@ -199,6 +199,74 @@ test("parseConfigBlock: fails on type mismatch (number where string expected)",
   );
 });
 
+// ---------------------------------------------------------------------------
+// Module manifest keys (module.name, module.version, module.description)
+// ---------------------------------------------------------------------------
+
+test("parseConfigBlock: parses module.name, module.version, module.description", () => {
+  const lines = [
+    "config {",
+    '  module.name = "my-workflow"',
+    '  module.version = "1.2.3"',
+    '  module.description = "A helpful workflow"',
+    "}",
+  ];
+  const { metadata } = parseConfigBlock("test.jh", lines, 0);
+  assert.equal(metadata.module?.name, "my-workflow");
+  assert.equal(metadata.module?.version, "1.2.3");
+  assert.equal(metadata.module?.description, "A helpful workflow");
+});
+
+test("parseConfigBlock: module keys are optional (partial set)", () => {
+  const lines = [
+    "config {",
+    '  module.name = "only-name"',
+    "}",
+  ];
+  const { metadata } = parseConfigBlock("test.jh", lines, 0);
+  assert.equal(metadata.module?.name, "only-name");
+  assert.equal(metadata.module?.version, undefined);
+  assert.equal(metadata.module?.description, undefined);
+});
+
+test("parseConfigBlock: module keys coexist with other config keys", () => {
+  const lines = [
+    "config {",
+    '  module.name = "proj"',
+    '  agent.backend = "claude"',
+    "}",
+  ];
+  const { metadata } = parseConfigBlock("test.jh", lines, 0);
+  assert.equal(metadata.module?.name, "proj");
+  assert.equal(metadata.agent?.backend, "claude");
+});
+
+test("module keys round-trip through formatter", () => {
+  const src = [
+    'config {',
+    '  module.name = "my-tool"',
+    '  module.version = "0.1.0"',
+    '  module.description = "Does things"',
+    '}',
+    '',
+    'workflow default() {',
+    '  log "ok"',
+    '}',
+  ].join("\n");
+  const mod = parsejaiph(src, "test.jh");
+  assert.equal(mod.metadata?.module?.name, "my-tool");
+  assert.equal(mod.metadata?.module?.version, "0.1.0");
+  assert.equal(mod.metadata?.module?.description, "Does things");
+
+  // Verify formatter round-trip produces valid source that re-parses identically
+  const { emitModule } = require("../format/emit");
+  const emitted = emitModule(mod);
+  const reparsed = parsejaiph(emitted, "test.jh");
+  assert.equal(reparsed.metadata?.module?.name, "my-tool");
+  assert.equal(reparsed.metadata?.module?.version, "0.1.0");
+  assert.equal(reparsed.metadata?.module?.description, "Does things");
+});
+
 // ---------------------------------------------------------------------------
 // Workflow-level config
 // ---------------------------------------------------------------------------
@@ -264,6 +332,20 @@ test("workflow config: rejects config after steps", () => {
   );
 });
 
+test("workflow config: rejects module.* keys", () => {
+  const src = [
+    "workflow default() {",
+    "  config {",
+    '    module.name = "nope"',
+    "  }",
+    "}",
+  ].join("\n");
+  assert.throws(
+    () => parsejaiph(src, "test.jh"),
+    /module\.\* keys are not allowed in workflow-level config/,
+  );
+});
+
 test("workflow config: rejects runtime.* keys", () => {
   const src = [
     "workflow default() {",
diff --git a/src/parse/workflows.ts b/src/parse/workflows.ts
index 7f5cbccb..f2bcadeb 100644
--- a/src/parse/workflows.ts
+++ b/src/parse/workflows.ts
@@ -142,6 +142,9 @@ export function parseWorkflowBlock(
         if (metadata.runtime) {
           fail(filePath, "runtime.* keys are not allowed in workflow-level config (only agent.* and run.* keys)", lineNo);
         }
+        if (metadata.module) {
+          fail(filePath, "module.* keys are not allowed in workflow-level config (only agent.* and run.* keys)", lineNo);
+        }
         workflow.metadata = metadata;
         continue;
       }
@@ -201,6 +204,9 @@ export function parseWorkflowBlock(
       if (metadata.runtime) {
         fail(filePath, "runtime.* keys are not allowed in workflow-level config (only agent.* and run.* keys)", innerNo);
       }
+      if (metadata.module) {
+        fail(filePath, "module.* keys are not allowed in workflow-level config (only agent.* and run.* keys)", innerNo);
+      }
       workflow.metadata = metadata;
       idx = nextIndex - 1;
       continue;
diff --git a/src/types.ts b/src/types.ts
index 05b48b64..f3696a6c 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -307,6 +307,7 @@ export interface WorkflowMetadata {
   };
   run?: { debug?: boolean; logsDir?: string; inboxParallel?: boolean };
   runtime?: RuntimeConfig;
+  module?: { name?: string; version?: string; description?: string };
   /** Preserves `#` lines and assignment order inside `config { }` (formatter). */
   configBodySequence?: ConfigBodyPart[];
 }

From d9b3b003cf1b2d07dde84c9908f5e93d3a5129cd Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Fri, 17 Apr 2026 22:12:21 +0200
Subject: [PATCH 07/38] Feat: Harden Docker execution environment with
 least-privilege defaults

Add mount denylist rejecting dangerous host paths (/, /proc, /sys, /dev,
Docker socket) at validation time with E_VALIDATE_MOUNT. Add environment
variable denylist (SSH_*, GPG_*, AWS_*, GCP_*, AZURE_*, GOOGLE_*, DOCKER_*,
KUBE*, NPM_TOKEN*) preventing host credential leakage into containers.
Launch containers with --cap-drop ALL --cap-add SYS_ADMIN --security-opt
no-new-privileges for least-privilege capability control.

Document threat model in docs/sandboxing.md covering what Docker does and
does not protect against (hooks on host, network egress, agent credential
forwarding, image supply chain, container escapes). Add failure-modes
reference table, expanded network-mode guidance, and env denylist spec.
Unit tests cover all new validation and filtering paths.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CHANGELOG.md               |   1 +
 QUEUE.md                   |  29 -----------
 docs/sandboxing.md         |  55 +++++++++++++++++++-
 src/runtime/docker.test.ts | 104 +++++++++++++++++++++++++++++++++++++
 src/runtime/docker.ts      |  68 +++++++++++++++++++++++-
 5 files changed, 225 insertions(+), 32 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ea08d0e0..50e5fffb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ## All changes
 
+- **Feature — Docker:** Harden Docker execution environment — Docker sandboxing now enforces least-privilege defaults and explicit boundary controls. Containers launch with `--cap-drop ALL --cap-add SYS_ADMIN --security-opt no-new-privileges`, dropping all Linux capabilities except the one required for fuse-overlayfs and preventing privilege escalation. A mount denylist rejects dangerous host paths (`/`, `/var/run/docker.sock`, `/run/docker.sock`, `/proc`, `/sys`, `/dev` and their subpaths) at validation time with `E_VALIDATE_MOUNT` — both in `validateMounts` and at `buildDockerArgs` time. An environment variable denylist (`SSH_*`, `GPG_*`, `AWS_*`, `GCP_*`, `AZURE_*`, `GOOGLE_*`, `DOCKER_*`, `KUBE*`, `NPM_TOKEN*`) prevents host credentials from leaking into the container; only `JAIPH_*` (except `JAIPH_DOCKER_*`) and agent prefixes (`ANTHROPIC_*`, `CLAUDE_*`, `CURSOR_*`) cross the container boundary. New exports: `validateMountHostPath`, `isEnvDenied`, `ENV_DENYLIST_PREFIXES`. Documentation adds a threat-model section (what Docker does and does not protect against), a failure-modes reference table (`E_DOCKER_*` / `E_VALIDATE_MOUNT` / `E_TIMEOUT`), expanded network-mode guidance, and the env denylist specification. Implementation: `src/runtime/docker.ts` (mount denylist, env denylist, security flags), `src/runtime/docker.test.ts` (unit tests for all new paths). Docs updated (`docs/sandboxing.md`).
 - **Feature — Language:** Optional module manifest keys (`module.name`, `module.version`, `module.description`) — The module-level `config { }` block now accepts three optional descriptive metadata keys: `module.name`, `module.version`, and `module.description`. All three are strings, all optional, and purely informational — they do not affect agent, run, or runtime behavior. Values are stored on `WorkflowMetadata.module` and round-trip through `jaiph format`. No semver validation is applied to `module.version`; any quoted string is accepted. Workflow-level `config` blocks reject `module.*` keys with `E_PARSE`, consistent with the existing `runtime.*` workflow guard. Future features (e.g. MCP tool metadata) may consume these fields. Implementation: `ALLOWED_KEYS` and `assignConfigKey` in `src/parse/metadata.ts`, `WorkflowMetadata.module` in `src/types.ts`, formatter round-trip in `src/format/emit.ts`, workflow-level rejection in `src/parse/workflows.ts`. Unit tests cover happy path, partial keys, coexistence with other config keys, formatter round-trip, and workflow-level rejection. Docs updated (`docs/configuration.md`, `docs/grammar.md`).
 - **Breaking — Docker:** Strict image contract and official GHCR runtime images — Docker mode now enforces a strict contract: every Docker image used by Jaiph must already contain a working `jaiph` CLI. Jaiph no longer auto-builds derived images or bootstraps itself into containers at runtime (no `npm pack`, no `npm install -g` into arbitrary base images). If the selected image lacks `jaiph`, the run fails immediately with `E_DOCKER_NO_JAIPH` and actionable guidance. The default `runtime.docker_image` is now `ghcr.io/jaiphlang/jaiph-runtime:<version>` (matching the installed jaiph version), replacing the previous `node:20-bookworm` default. Official runtime images are published to GHCR: `ghcr.io/jaiphlang/jaiph-runtime:<semver>` for release tags, `:nightly` for the nightly branch, and `:latest` as a convenience alias. The official image includes Node.js, jaiph, `fuse-overlayfs`, and a non-root `jaiph` user (UID 10001); it does not include agent CLIs to keep the image minimal. The `jaiph init` Dockerfile template now extends the official image (`FROM ghcr.io/jaiphlang/jaiph-runtime:nightly`) and only adds agent CLIs (Claude Code, cursor-agent), instead of building from `ubuntu:latest` with a full install chain. Removed functions: `ensureLocalRuntimeImage`, `buildRuntimeImageFromLocalPackage`, `autoRuntimeImageTag`, `imageConfiguredUser`, `imageHomeDir`. Added: `verifyImageHasJaiph`, `GHCR_IMAGE_REPO`, `resolveDefaultImageTag`. CI: new `.github/workflows/docker-publish.yml` publishes the runtime image on release tags and nightly pushes. Implementation: `src/runtime/docker.ts`, `src/cli/commands/init.ts`, `docker/Dockerfile.runtime`. Unit and E2E tests updated for the strict contract — regression test confirms images without jaiph fail with `E_DOCKER_NO_JAIPH`. Docs updated (`docs/sandboxing.md`, `docs/configuration.md`, `docs/cli.md`, `docs/architecture.md`).
 - **Feature — Language/Runtime:** Explicit nested managed calls in argument position — Call arguments can now contain nested managed calls using `run` or `ensure` keywords explicitly: `run foo(run bar())`, `run foo(ensure rule_bar())`, and `run foo(run \`echo "aaa"\`())`. The nested call executes first and its result is passed as a single argument to the outer call. Bare call-like forms in argument position are rejected at compile time: `run foo(bar())` → `E_VALIDATE` with an actionable message telling the user to add `run` or `ensure`. Bare inline script calls in argument position (`run foo(\`echo aaa\`())`) are also rejected with guidance. The explicit capture-then-pass form (`const x = run bar()` followed by `run foo(x)`) remains valid. Bare call-like forms in `const` assignments (`const x = bar()`) are also rejected — use `const x = run bar()`. The formatter round-trips explicit nested forms correctly, including the inline script variant. The runtime evaluates nested managed argument tokens (workflows, scripts, rules, and inline scripts) before passing the result to the outer call. Implementation: validator (`src/transpile/validate.ts` — `validateNestedManagedCallArgs` extended for inline script detection), runtime (`src/runtime/kernel/node-workflow-runtime.ts` — `managed_inline_script` token kind, `parseInlineScriptAt`, `resolveArgsRawSync` fast path), formatter (`src/format/emit.ts` — `parseInlineScriptArg`, inline script formatting in `formatArgs`). Regression tests added for all valid and invalid forms. Docs updated (`docs/language.md`, `docs/grammar.md`, `docs/jaiph-skill.md`).
diff --git a/QUEUE.md b/QUEUE.md
index 7c992336..a1df4264 100644
--- a/QUEUE.md
+++ b/QUEUE.md
@@ -12,35 +12,6 @@ Process rules:
 
 ***
 
-## Runtime — harden Docker execution environment #dev-ready
-
-**Goal**
-Docker mode is the isolation boundary for workflow runs. Harden it: least-privilege mounts, explicit and documented env forwarding (what crosses the container boundary), network defaults, and failure modes when Docker is misconfigured or unavailable — so "Docker on" is a deliberate security posture, not accidental leakage. (Image provenance and the official default image belong to the queued **Docker — strict image contract + GHCR** task; this task only documents or tightens runtime-visible pull/verify behavior as needed, without redefining publishing or the default image.)
-
-**Context**
-
-* Docker runtime: `src/runtime/docker.ts` (`parseMounts` / `validateMounts`, `resolveDockerConfig`, `buildDockerArgs`, `checkDockerAvailable`, `spawnDockerProcess`); CLI integration: `src/cli/commands/run.ts`.
-* Current forwarding: `buildDockerArgs` remaps `JAIPH_WORKSPACE` and `JAIPH_RUNS_DIR`, passes through `JAIPH_*` except `JAIPH_DOCKER_*`, and passes keys prefixed `CURSOR_`, `ANTHROPIC_`, or `CLAUDE_` (see `AGENT_ENV_PREFIXES` in `docker.ts`). Mounts come from resolved `runtime.workspace` plus fixed rw run-dir, ro overlay script, and `--device /dev/fuse`.
-* E2E: `e2e/tests/72_docker_run_artifacts.sh`, `e2e/tests/73_docker_dockerfile_detection.sh`.
-* Config: `runtime.docker_enabled`, `runtime.docker_image`, `runtime.docker_network`, `runtime.docker_timeout`, `runtime.workspace` via `src/config.ts` and metadata parsing.
-
-**Queue coordination**
-
-* Land after or together with **Docker — strict image contract + publish official `jaiph-runtime` images to GHCR** so bootstrap removal and default image changes are settled before deep hardening refactors the same code paths.
-* Land after or together with **Runtime — credential proxy for Docker mode** so any env allowlist/denylist and `docs/sandboxing.md` text stay consistent with placeholder `ANTHROPIC_*` and host-reachable API base URLs (no real secrets in `-e`).
-* The later task **Runtime — default Docker when not CI or unsafe** changes `runtime.docker_enabled` defaults; avoid conflicting precedence — document how hardened Docker behavior interacts with that default once both exist.
-
-**Acceptance criteria**
-
-* Threat-model notes (short section in `docs/sandboxing.md` or equivalent): what Docker is / is not protecting against (including that hooks run on the host).
-* Concrete hardening changes in `docker.ts` / run path (e.g. mount validation, env allowlist or documented denylist aligned with the credential-proxy contract, safer defaults) with unit tests.
-* No silent widen of host access without opt-in.
-* Document network mode behavior (`runtime.docker_network` / `--network`) and failure modes for missing Docker or failed pulls (`E_DOCKER_*`), extending existing patterns where appropriate.
-
-**Scope note**
-
-* `docker.ts` is already large (\~650+ lines); prefer small helpers or one focused sibling module over speculative abstractions. Expect at least `docker.ts`, `docker.test.ts`, and `docs/sandboxing.md`; split follow-ups if the change set outgrows one cycle.
-
 ## Runtime — default Docker when not CI or unsafe #dev-ready
 
 **Goal**
diff --git a/docs/sandboxing.md b/docs/sandboxing.md
index b43d5080..c8648157 100644
--- a/docs/sandboxing.md
+++ b/docs/sandboxing.md
@@ -19,6 +19,25 @@ The runtime executes rules by walking the AST in-process (`NodeWorkflowRuntime.e
 
 `jaiph test` executes tests in-process with `NodeTestRunner` and does not use Docker or a separate rule sandbox.
 
+## Threat model
+
+Docker sandboxing is designed to contain damage from untrusted or semi-trusted workflow scripts. Understanding what it does and does not protect against helps you make informed decisions about when to enable it.
+
+**What Docker protects against:**
+
+- **Filesystem access** -- Scripts inside the container cannot read or write arbitrary host paths. The host workspace is mounted read-only; writes go to a tmpfs overlay and are discarded on exit. Only the run-artifacts directory (`/jaiph/run`) persists writes to the host.
+- **Process isolation** -- Container processes cannot see or signal host processes. The container runs with `--cap-drop ALL` (only `SYS_ADMIN` is re-added for fuse-overlayfs) and `--security-opt no-new-privileges` to prevent privilege escalation.
+- **Credential leakage** -- Sensitive host environment variables (`SSH_*`, `GPG_*`, `AWS_*`, `GCP_*`, `AZURE_*`, `GOOGLE_*`, `DOCKER_*`, `KUBE*`, `NPM_TOKEN*`) are never forwarded into the container. Only `JAIPH_*` (except `JAIPH_DOCKER_*`) and agent prefixes (`ANTHROPIC_*`, `CLAUDE_*`, `CURSOR_*`) cross the container boundary.
+- **Mount safety** -- The host root filesystem (`/`), Docker socket (`/var/run/docker.sock`, `/run/docker.sock`), and OS internals (`/proc`, `/sys`, `/dev`) cannot be mounted into the container. Attempting to do so produces `E_VALIDATE_MOUNT`.
+
+**What Docker does NOT protect against:**
+
+- **Hooks run on the host.** Hook commands in `hooks.json` execute on the host CLI process, not inside the container. A malicious hook definition has full host access. Treat `hooks.json` as trusted configuration.
+- **Network egress by default.** Unless `runtime.docker_network` is set to `"none"`, the container has outbound network access via Docker's default bridge. Scripts can reach external services and exfiltrate data through the network.
+- **Agent credential forwarding.** `ANTHROPIC_*`, `CLAUDE_*`, and `CURSOR_*` variables are forwarded into the container so agent-backed workflows function. A malicious script can read these from its environment. When the credential-proxy feature lands, these will be replaced by proxy URLs that do not expose raw API keys.
+- **Image supply chain.** Jaiph verifies that the selected image contains `jaiph` but does not verify image signatures or provenance. Use trusted registries and pin image digests for production workloads.
+- **Container escapes.** Docker is not a security boundary against a determined attacker with kernel exploits. It raises the bar significantly for script-level mischief but is not equivalent to a VM or hardware-level isolation.
+
 ## Docker container isolation
 
 > **Beta.** Docker sandboxing is functional but still under active development. Expect rough edges, breaking changes, and incomplete platform coverage. Feedback is welcome at <https://github.com/jaiphlang/jaiph/issues>.
@@ -80,6 +99,12 @@ Mode must be `ro` or `rw` (otherwise `E_PARSE`). Exactly one mount must target `
 
 Host paths are resolved relative to the workspace root. Each mount is duplicated at the overlay lower-layer path (`/jaiph/workspace-ro/...`) so the overlay wrapper can use it as the read-only source.
 
+The following host paths are rejected at mount validation time with `E_VALIDATE_MOUNT`:
+
+- `/` (host root filesystem)
+- `/var/run/docker.sock`, `/run/docker.sock` (Docker daemon socket)
+- `/proc`, `/sys`, `/dev` (OS internals, including subpaths like `/proc/1/root`)
+
 ### Container layout
 
 ```
@@ -96,7 +121,7 @@ The working directory is `/jaiph/workspace`. The host CLI generates `overlay-run
 
 ### Runtime behavior
 
-**Container lifecycle** -- `docker run --rm` launches the container and auto-removes it on exit. `--device /dev/fuse` exposes the FUSE device for the overlay. The pseudo-TTY flag (`-t`) is intentionally omitted: Docker's `-t` merges stderr into stdout, which would break the `__JAIPH_EVENT__` stderr-only live contract. On Linux, `--user <uid>:<gid>` maps the container user to the host user.
+**Container lifecycle** -- `docker run --rm` launches the container and auto-removes it on exit. `--cap-drop ALL --cap-add SYS_ADMIN` drops all Linux capabilities except `SYS_ADMIN` (required for fuse-overlayfs). `--security-opt no-new-privileges` prevents any process inside the container from gaining additional privileges. `--device /dev/fuse` exposes the FUSE device for the overlay. The pseudo-TTY flag (`-t`) is intentionally omitted: Docker's `-t` merges stderr into stdout, which would break the `__JAIPH_EVENT__` stderr-only live contract. On Linux, `--user <uid>:<gid>` maps the container user to the host user.
 
 **stdin** -- The `docker run` process is spawned with stdin set to `ignore` to prevent the Docker CLI from blocking on stdin EOF.
 
@@ -106,12 +131,28 @@ The working directory is `/jaiph/workspace`. The host CLI generates `overlay-run
 
 **Run artifacts** -- The host CLI mounts the resolved host runs root at `/jaiph/run:rw` inside the container. By default this is `.jaiph/runs` under the workspace; a relative `JAIPH_RUNS_DIR` is resolved under the workspace; an absolute `JAIPH_RUNS_DIR` must stay within the workspace or the run fails with `E_DOCKER_RUNS_DIR`. `JAIPH_RUNS_DIR` is set to `/jaiph/run` inside the container, so the runtime writes artifacts directly into the requested host path.
 
-**Network** -- `"default"` omits `--network` (Docker's default bridge). `"none"` passes `--network none`. Any other value is passed through as-is.
+**Network** -- `"default"` omits `--network`, which uses Docker's default bridge network (outbound access allowed). `"none"` passes `--network none` and fully disables networking -- use this for workflows that should not make external calls. Any other value (e.g. a custom Docker network name) is passed through as-is. Set `runtime.docker_network` in config or `JAIPH_DOCKER_NETWORK` in the environment.
 
 **Timeout** -- When `runtime.docker_timeout` is greater than zero, the CLI sends `SIGTERM` to the container process on overrun, followed by `SIGKILL` after a 5-second grace period. The failure message includes `E_TIMEOUT container execution exceeded timeout`.
 
 **Image pull** -- If the image is not present locally, `docker pull` runs automatically. Pull failure produces `E_DOCKER_PULL`.
 
+### Failure modes
+
+Docker-related errors use `E_DOCKER_*` codes for programmatic detection:
+
+| Error code | Trigger | Behavior |
+|------------|---------|----------|
+| `E_DOCKER_NOT_FOUND` | `docker info` fails (Docker not installed or daemon not running) | Run exits immediately. No fallback to local execution. |
+| `E_DOCKER_PULL` | `docker pull` fails (network error, image not found, auth failure) | Run exits. Check registry access and image name. |
+| `E_DOCKER_BUILD` | `docker build` from `.jaiph/Dockerfile` fails | Run exits. Fix the Dockerfile and retry. |
+| `E_DOCKER_NO_JAIPH` | Selected image does not contain a `jaiph` CLI | Run exits with guidance to use the official image or install jaiph. |
+| `E_DOCKER_RUNS_DIR` | Absolute `JAIPH_RUNS_DIR` points outside the workspace | Run exits. Use a relative path or an absolute path within the workspace. |
+| `E_VALIDATE_MOUNT` | Mount targets a denied host path (`/`, `/proc`, docker socket, etc.) | Run exits before container launch. |
+| `E_TIMEOUT` | Container exceeds `runtime.docker_timeout` seconds | Container receives SIGTERM, then SIGKILL after 5s grace period. |
+
+All failures are deterministic and produce non-zero exit codes. There is no silent fallback from Docker to local execution.
+
 ### Image contract
 
 **Every Docker image used by Jaiph must already contain a working `jaiph` CLI.** Jaiph does not auto-install itself into containers at runtime — no derived image builds, no `npm pack` bootstrap. If the selected image lacks `jaiph`, the run fails immediately with `E_DOCKER_NO_JAIPH` and guidance to use the official image or install jaiph in a custom image.
@@ -163,6 +204,16 @@ All `JAIPH_*` variables from the host are forwarded into the container, **except
 - `ANTHROPIC_*`
 - `CLAUDE_*`
 
+The following prefixes are **never** forwarded, even if present on the host:
+
+- `SSH_*`, `GPG_*` -- authentication agent sockets and signing keys
+- `AWS_*`, `GCP_*`, `AZURE_*`, `GOOGLE_*` -- cloud provider credentials
+- `DOCKER_*` -- Docker daemon configuration (prevents container-in-container)
+- `KUBE*` -- Kubernetes configuration
+- `NPM_TOKEN*` -- package registry credentials
+
+This denylist is enforced in `buildDockerArgs` and cannot be overridden. If a workflow needs cloud credentials inside the container, pass them explicitly through `JAIPH_*`-prefixed variables or use a credential proxy.
+
 ### Example
 
 A workflow with Docker sandboxing enabled and an extra read-only mount for a `config` directory (using the shorthand form):
diff --git a/src/runtime/docker.test.ts b/src/runtime/docker.test.ts
index c9d4ef7a..e706697d 100644
--- a/src/runtime/docker.test.ts
+++ b/src/runtime/docker.test.ts
@@ -4,6 +4,7 @@ import {
   parseMount,
   parseMounts,
   validateMounts,
+  validateMountHostPath,
   resolveDockerConfig,
   buildDockerArgs,
   remapDockerEnv,
@@ -14,6 +15,8 @@ import {
   resolveImage,
   buildImageFromDockerfile,
   verifyImageHasJaiph,
+  isEnvDenied,
+  ENV_DENYLIST_PREFIXES,
   GHCR_IMAGE_REPO,
   type MountSpec,
   type DockerRunConfig,
@@ -572,3 +575,104 @@ test("verifyImageHasJaiph: throws E_DOCKER_NO_JAIPH with guidance for missing ja
   assert.ok(src.includes("E_DOCKER_NO_JAIPH"), "verifyImageHasJaiph must use E_DOCKER_NO_JAIPH error code");
   assert.ok(src.includes(GHCR_IMAGE_REPO), "error message must reference official GHCR image");
 });
+
+// ---------------------------------------------------------------------------
+// validateMountHostPath: dangerous mount rejection
+// ---------------------------------------------------------------------------
+
+test("validateMountHostPath: allows normal workspace path", () => {
+  assert.doesNotThrow(() => validateMountHostPath("/home/user/project"));
+});
+
+test("validateMountHostPath: rejects root filesystem", () => {
+  assert.throws(() => validateMountHostPath("/"), /E_VALIDATE_MOUNT.*root filesystem/);
+});
+
+test("validateMountHostPath: rejects docker socket", () => {
+  assert.throws(() => validateMountHostPath("/var/run/docker.sock"), /E_VALIDATE_MOUNT.*denied/);
+});
+
+test("validateMountHostPath: rejects /proc", () => {
+  assert.throws(() => validateMountHostPath("/proc"), /E_VALIDATE_MOUNT.*denied/);
+});
+
+test("validateMountHostPath: rejects /proc subpath", () => {
+  assert.throws(() => validateMountHostPath("/proc/1/root"), /E_VALIDATE_MOUNT.*denied/);
+});
+
+test("validateMountHostPath: rejects /sys", () => {
+  assert.throws(() => validateMountHostPath("/sys"), /E_VALIDATE_MOUNT.*denied/);
+});
+
+test("validateMountHostPath: rejects /dev", () => {
+  assert.throws(() => validateMountHostPath("/dev"), /E_VALIDATE_MOUNT.*denied/);
+});
+
+test("validateMountHostPath: rejects /run/docker.sock", () => {
+  assert.throws(() => validateMountHostPath("/run/docker.sock"), /E_VALIDATE_MOUNT.*denied/);
+});
+
+// ---------------------------------------------------------------------------
+// isEnvDenied: env denylist
+// ---------------------------------------------------------------------------
+
+test("isEnvDenied: blocks SSH_ vars", () => {
+  assert.equal(isEnvDenied("SSH_AUTH_SOCK"), true);
+});
+
+test("isEnvDenied: blocks AWS_ vars", () => {
+  assert.equal(isEnvDenied("AWS_SECRET_ACCESS_KEY"), true);
+});
+
+test("isEnvDenied: blocks DOCKER_ vars", () => {
+  assert.equal(isEnvDenied("DOCKER_HOST"), true);
+});
+
+test("isEnvDenied: blocks GPG_ vars", () => {
+  assert.equal(isEnvDenied("GPG_AGENT_INFO"), true);
+});
+
+test("isEnvDenied: blocks KUBE vars", () => {
+  assert.equal(isEnvDenied("KUBECONFIG"), true);
+});
+
+test("isEnvDenied: allows JAIPH_ vars", () => {
+  assert.equal(isEnvDenied("JAIPH_DEBUG"), false);
+});
+
+test("isEnvDenied: allows ANTHROPIC_ vars", () => {
+  assert.equal(isEnvDenied("ANTHROPIC_API_KEY"), false);
+});
+
+test("buildDockerArgs: denied env vars are not forwarded", () => {
+  const opts = defaultOpts({
+    env: {
+      JAIPH_DEBUG: "true",
+      SSH_AUTH_SOCK: "/tmp/ssh.sock",
+      AWS_SECRET_ACCESS_KEY: "secret",
+      DOCKER_HOST: "unix:///var/run/docker.sock",
+    },
+  });
+  const args = buildDockerArgs(opts, TEST_OVERLAY);
+  assert.ok(args.includes("JAIPH_DEBUG=true"), "allowed JAIPH_ var forwarded");
+  assert.ok(!args.some((a) => a.includes("SSH_AUTH_SOCK")), "SSH_ denied");
+  assert.ok(!args.some((a) => a.includes("AWS_SECRET_ACCESS_KEY")), "AWS_ denied");
+  assert.ok(!args.some((a) => a.includes("DOCKER_HOST")), "DOCKER_ denied");
+});
+
+// ---------------------------------------------------------------------------
+// buildDockerArgs: security flags
+// ---------------------------------------------------------------------------
+
+test("buildDockerArgs: includes --cap-drop ALL and --security-opt no-new-privileges", () => {
+  const args = buildDockerArgs(defaultOpts(), TEST_OVERLAY);
+  const capDropIdx = args.indexOf("--cap-drop");
+  assert.ok(capDropIdx >= 0, "--cap-drop present");
+  assert.equal(args[capDropIdx + 1], "ALL");
+  const capAddIdx = args.indexOf("--cap-add");
+  assert.ok(capAddIdx >= 0, "--cap-add present");
+  assert.equal(args[capAddIdx + 1], "SYS_ADMIN");
+  const secOptIdx = args.indexOf("--security-opt");
+  assert.ok(secOptIdx >= 0, "--security-opt present");
+  assert.equal(args[secOptIdx + 1], "no-new-privileges");
+});
diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts
index 83541724..06f6df5d 100644
--- a/src/runtime/docker.ts
+++ b/src/runtime/docker.ts
@@ -61,10 +61,44 @@ export function parseMounts(specs: string[]): MountSpec[] {
   return mounts;
 }
 
+/**
+ * Host paths that must never be bind-mounted into a container.
+ * Prevents accidental exposure of the Docker daemon, OS internals, or
+ * the entire root filesystem.
+ */
+const DENIED_HOST_PATHS = [
+  "/var/run/docker.sock",
+  "/run/docker.sock",
+  "/proc",
+  "/sys",
+  "/dev",
+] as const;
+
+/**
+ * Validate a single mount's host path against the denylist.
+ * Rejects exact matches and child paths (e.g. `/proc/1/root`).
+ */
+export function validateMountHostPath(hostAbsPath: string): void {
+  const normalized = hostAbsPath.replace(/\/+$/, "");
+  if (normalized === "" || normalized === "/") {
+    throw new Error(
+      `E_VALIDATE_MOUNT refusing to mount the host root filesystem ("/") into the container`,
+    );
+  }
+  for (const denied of DENIED_HOST_PATHS) {
+    if (normalized === denied || normalized.startsWith(denied + "/")) {
+      throw new Error(
+        `E_VALIDATE_MOUNT refusing to mount denied host path "${hostAbsPath}" into the container`,
+      );
+    }
+  }
+}
+
 /**
  * Validate mount list: exactly one mount must target `/jaiph/workspace`.
+ * Also rejects dangerous host paths.
  */
-export function validateMounts(mounts: MountSpec[]): void {
+export function validateMounts(mounts: MountSpec[], workspaceRoot?: string): void {
   const workspaceMounts = mounts.filter(
     (m) => m.containerPath === "/jaiph/workspace" || m.containerPath.replace(/\/+$/, "") === "/jaiph/workspace",
   );
@@ -74,6 +108,10 @@ export function validateMounts(mounts: MountSpec[]): void {
   if (workspaceMounts.length > 1) {
     throw new Error("E_VALIDATE exactly one mount must target /jaiph/workspace, found multiple");
   }
+  for (const mount of mounts) {
+    const hostAbs = workspaceRoot ? resolve(workspaceRoot, mount.hostPath) : resolve(mount.hostPath);
+    validateMountHostPath(hostAbs);
+  }
 }
 
 // ---------------------------------------------------------------------------
@@ -363,6 +401,27 @@ export const CONTAINER_WORKSPACE = "/jaiph/workspace";
 export const CONTAINER_RUN_DIR = "/jaiph/run";
 const AGENT_ENV_PREFIXES = ["CURSOR_", "ANTHROPIC_", "CLAUDE_"] as const;
 
+/**
+ * Environment variable prefixes that are never forwarded into the container.
+ * Prevents leaking host credentials that aren't part of the explicit allowlist.
+ */
+export const ENV_DENYLIST_PREFIXES = [
+  "SSH_",
+  "GPG_",
+  "AWS_",
+  "GCP_",
+  "AZURE_",
+  "GOOGLE_",
+  "DOCKER_",
+  "KUBE",
+  "NPM_TOKEN",
+] as const;
+
+/** Returns true if `key` matches any denied prefix. */
+export function isEnvDenied(key: string): boolean {
+  return ENV_DENYLIST_PREFIXES.some((prefix) => key.startsWith(prefix));
+}
+
 /** Resolve the host run-artifacts root for Docker-backed runs. */
 export function resolveDockerHostRunsRoot(
   workspaceRoot: string,
@@ -426,6 +485,11 @@ export function overlayMountPath(containerPath: string): string {
 export function buildDockerArgs(opts: DockerSpawnOptions, overlayScriptPath: string): string[] {
   const args: string[] = ["run", "--rm"];
 
+  // Least-privilege: drop all capabilities, re-add only SYS_ADMIN for fuse-overlayfs
+  args.push("--cap-drop", "ALL");
+  args.push("--cap-add", "SYS_ADMIN");
+  args.push("--security-opt", "no-new-privileges");
+
   args.push("--device", "/dev/fuse");
 
   if (process.platform === "linux") {
@@ -445,6 +509,7 @@ export function buildDockerArgs(opts: DockerSpawnOptions, overlayScriptPath: str
   // Workspace inputs: mounted only at the overlay lower-layer path.
   for (const mount of opts.config.mounts) {
     const hostAbs = resolve(opts.workspaceRoot, mount.hostPath);
+    validateMountHostPath(hostAbs);
     args.push("-v", `${hostAbs}:${overlayMountPath(mount.containerPath)}:ro`);
   }
 
@@ -459,6 +524,7 @@ export function buildDockerArgs(opts: DockerSpawnOptions, overlayScriptPath: str
 
   for (const [key, value] of Object.entries(containerEnv)) {
     if (value === undefined) continue;
+    if (isEnvDenied(key)) continue;
     if (key.startsWith("JAIPH_") && !key.startsWith("JAIPH_DOCKER_")) {
       args.push("-e", `${key}=${value}`);
     }

From e08f584e384156fd0d8ed7b0dcb45cd0c93a48e0 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Fri, 17 Apr 2026 22:21:39 +0200
Subject: [PATCH 08/38] Feat: Default Docker sandboxing to on for local
 development

Docker is now enabled by default when neither CI=true nor
JAIPH_UNSAFE=true is set in the environment. This makes sandboxed
execution the safe default for local development while keeping Docker
off in CI (where it is typically unavailable or redundant) and when
the user explicitly opts out via JAIPH_UNSAFE=true.

Precedence: JAIPH_DOCKER_ENABLED env > in-file runtime.docker_enabled
> CI/unsafe default rule. The test harness and E2E runner set
JAIPH_UNSAFE=true so existing tests continue to run on host.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CHANGELOG.md               |  1 +
 QUEUE.md                   | 21 ---------------------
 docs/configuration.md      |  6 +++---
 docs/index.html            |  5 +++--
 docs/sandboxing.md         | 24 ++++++++++++++++++------
 e2e/test_all.sh            |  1 +
 package.json               |  2 +-
 src/runtime/docker.test.ts | 24 ++++++++++++++++++++++--
 src/runtime/docker.ts      | 14 ++++++++++----
 9 files changed, 59 insertions(+), 39 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 50e5fffb..052f85e9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ## All changes
 
+- **Feature — Docker:** Default Docker when not CI or unsafe — Docker sandboxing is now **on by default** for local development. When neither `CI=true` nor `JAIPH_UNSAFE=true` is set in the environment, `runtime.docker_enabled` defaults to `true`. In CI environments or when `JAIPH_UNSAFE=true` is set, the default is `false`. Explicit overrides (`JAIPH_DOCKER_ENABLED` env var or in-file `runtime.docker_enabled`) always take precedence over the default rule. `JAIPH_UNSAFE=true` is the new explicit escape hatch for local development when Docker is unwanted. Implementation: `resolveDockerConfig()` in `src/runtime/docker.ts`. Unit tests for all env combinations added. Docs updated (`docs/sandboxing.md`, `docs/configuration.md`).
 - **Feature — Docker:** Harden Docker execution environment — Docker sandboxing now enforces least-privilege defaults and explicit boundary controls. Containers launch with `--cap-drop ALL --cap-add SYS_ADMIN --security-opt no-new-privileges`, dropping all Linux capabilities except the one required for fuse-overlayfs and preventing privilege escalation. A mount denylist rejects dangerous host paths (`/`, `/var/run/docker.sock`, `/run/docker.sock`, `/proc`, `/sys`, `/dev` and their subpaths) at validation time with `E_VALIDATE_MOUNT` — both in `validateMounts` and at `buildDockerArgs` time. An environment variable denylist (`SSH_*`, `GPG_*`, `AWS_*`, `GCP_*`, `AZURE_*`, `GOOGLE_*`, `DOCKER_*`, `KUBE*`, `NPM_TOKEN*`) prevents host credentials from leaking into the container; only `JAIPH_*` (except `JAIPH_DOCKER_*`) and agent prefixes (`ANTHROPIC_*`, `CLAUDE_*`, `CURSOR_*`) cross the container boundary. New exports: `validateMountHostPath`, `isEnvDenied`, `ENV_DENYLIST_PREFIXES`. Documentation adds a threat-model section (what Docker does and does not protect against), a failure-modes reference table (`E_DOCKER_*` / `E_VALIDATE_MOUNT` / `E_TIMEOUT`), expanded network-mode guidance, and the env denylist specification. Implementation: `src/runtime/docker.ts` (mount denylist, env denylist, security flags), `src/runtime/docker.test.ts` (unit tests for all new paths). Docs updated (`docs/sandboxing.md`).
 - **Feature — Language:** Optional module manifest keys (`module.name`, `module.version`, `module.description`) — The module-level `config { }` block now accepts three optional descriptive metadata keys: `module.name`, `module.version`, and `module.description`. All three are strings, all optional, and purely informational — they do not affect agent, run, or runtime behavior. Values are stored on `WorkflowMetadata.module` and round-trip through `jaiph format`. No semver validation is applied to `module.version`; any quoted string is accepted. Workflow-level `config` blocks reject `module.*` keys with `E_PARSE`, consistent with the existing `runtime.*` workflow guard. Future features (e.g. MCP tool metadata) may consume these fields. Implementation: `ALLOWED_KEYS` and `assignConfigKey` in `src/parse/metadata.ts`, `WorkflowMetadata.module` in `src/types.ts`, formatter round-trip in `src/format/emit.ts`, workflow-level rejection in `src/parse/workflows.ts`. Unit tests cover happy path, partial keys, coexistence with other config keys, formatter round-trip, and workflow-level rejection. Docs updated (`docs/configuration.md`, `docs/grammar.md`).
 - **Breaking — Docker:** Strict image contract and official GHCR runtime images — Docker mode now enforces a strict contract: every Docker image used by Jaiph must already contain a working `jaiph` CLI. Jaiph no longer auto-builds derived images or bootstraps itself into containers at runtime (no `npm pack`, no `npm install -g` into arbitrary base images). If the selected image lacks `jaiph`, the run fails immediately with `E_DOCKER_NO_JAIPH` and actionable guidance. The default `runtime.docker_image` is now `ghcr.io/jaiphlang/jaiph-runtime:<version>` (matching the installed jaiph version), replacing the previous `node:20-bookworm` default. Official runtime images are published to GHCR: `ghcr.io/jaiphlang/jaiph-runtime:<semver>` for release tags, `:nightly` for the nightly branch, and `:latest` as a convenience alias. The official image includes Node.js, jaiph, `fuse-overlayfs`, and a non-root `jaiph` user (UID 10001); it does not include agent CLIs to keep the image minimal. The `jaiph init` Dockerfile template now extends the official image (`FROM ghcr.io/jaiphlang/jaiph-runtime:nightly`) and only adds agent CLIs (Claude Code, cursor-agent), instead of building from `ubuntu:latest` with a full install chain. Removed functions: `ensureLocalRuntimeImage`, `buildRuntimeImageFromLocalPackage`, `autoRuntimeImageTag`, `imageConfiguredUser`, `imageHomeDir`. Added: `verifyImageHasJaiph`, `GHCR_IMAGE_REPO`, `resolveDefaultImageTag`. CI: new `.github/workflows/docker-publish.yml` publishes the runtime image on release tags and nightly pushes. Implementation: `src/runtime/docker.ts`, `src/cli/commands/init.ts`, `docker/Dockerfile.runtime`. Unit and E2E tests updated for the strict contract — regression test confirms images without jaiph fail with `E_DOCKER_NO_JAIPH`. Docs updated (`docs/sandboxing.md`, `docs/configuration.md`, `docs/cli.md`, `docs/architecture.md`).
diff --git a/QUEUE.md b/QUEUE.md
index a1df4264..102f91b6 100644
--- a/QUEUE.md
+++ b/QUEUE.md
@@ -12,27 +12,6 @@ Process rules:
 
 ***
 
-## Runtime — default Docker when not CI or unsafe #dev-ready
-
-**Goal**
-When the user has not opted into "unsafe" local execution, workflows should run in Docker by default. **Default `runtime.docker_enabled` to on** only when **neither** `CI=true` **nor** `JAIPH_UNSAFE=true` is set in the environment. If either is set, default Docker to **off** unless explicitly overridden via `runtime.docker_enabled` / `JAIPH_DOCKER_ENABLED`.
-
-Introduce **`JAIPH_UNSAFE=true`** as the explicit "run on host / skip Docker default" escape hatch for local development when Docker is unwanted; document it next to `CI`.
-
-**Context**
-
-* Config resolution: `src/config.ts` — `resolveDockerConfig()` or equivalent; where `runtime.docker_enabled` default is determined.
-* Env precedence: explicit `JAIPH_DOCKER_ENABLED` / in-file `runtime.docker_enabled` overrides defaults; then CI / unsafe default rule.
-* E2E Docker tests: `e2e/tests/72_docker_run_artifacts.sh`, `e2e/tests/73_docker_dockerfile_detection.sh` — may need env setup adjustments.
-
-**Acceptance criteria**
-
-* `resolveDockerConfig()` (and any CLI preflight messaging) implements the precedence: explicit `JAIPH_DOCKER_ENABLED` / in-file `runtime.docker_enabled` overrides defaults; then apply CI / unsafe default rule.
-* Unit tests for env combinations: plain local → Docker default on; `CI=true` → default off; `JAIPH_UNSAFE=true` → default off; both unset with explicit `JAIPH_DOCKER_ENABLED=false` → off.
-* `CHANGELOG` + sandboxing / configuration docs updated.
-
-***
-
 ## Docker sandbox: Figure out a way to pass code changes from engineer.jh in docker mode to local env (git patch saved to .jaiph/runs?)
 
 ## Runtime — credential proxy for Docker mode
diff --git a/docs/configuration.md b/docs/configuration.md
index faa039f6..5a1b2891 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -17,7 +17,7 @@ All execution goes through the Node workflow runtime (`NodeWorkflowRuntime`), wh
 
 Jaiph provides three configuration mechanisms. When the same key is set in more than one place, the highest-priority source wins:
 
-1. **Environment variables** — highest priority. `JAIPH_AGENT_*`, `JAIPH_RUNS_DIR`, `JAIPH_DEBUG`, `JAIPH_INBOX_PARALLEL`, and `JAIPH_DOCKER_*`.
+1. **Environment variables** — highest priority. `JAIPH_AGENT_*`, `JAIPH_RUNS_DIR`, `JAIPH_DEBUG`, `JAIPH_INBOX_PARALLEL`, `JAIPH_DOCKER_*`, and `JAIPH_UNSAFE`.
 2. **In-file `config { ... }` blocks** — at module scope and optionally inside a `workflow` body.
 3. **Built-in defaults** — lowest priority, used when nothing else sets a value.
 
@@ -170,7 +170,7 @@ These configure Docker sandboxing. Unlike agent and run keys, runtime keys are r
 
 | Key | Type | Default | Env variable | Description |
 |-----|------|---------|--------------|-------------|
-| `runtime.docker_enabled` | boolean | `false` | `JAIPH_DOCKER_ENABLED` | Enable Docker for this run. |
+| `runtime.docker_enabled` | boolean | `true` locally; `false` when `CI=true` or `JAIPH_UNSAFE=true` | `JAIPH_DOCKER_ENABLED` | Enable Docker for this run. See [Sandboxing -- Enabling Docker](sandboxing.md#enabling-docker) for the default rule. |
 | `runtime.docker_image` | string | `ghcr.io/jaiphlang/jaiph-runtime:<version>` | `JAIPH_DOCKER_IMAGE` | Image name. Must already contain `jaiph`. When unset, Jaiph builds from `.jaiph/Dockerfile` if it exists, otherwise uses the official GHCR image matching the installed jaiph version. |
 | `runtime.docker_network` | string | `default` | `JAIPH_DOCKER_NETWORK` | Docker network mode. |
 | `runtime.docker_timeout` | integer | `300` | `JAIPH_DOCKER_TIMEOUT` | Timeout in seconds. Invalid or unparsable values fall back to the default. |
@@ -187,7 +187,7 @@ For **agent and run keys**, resolution order (highest wins):
 3. **Module-level `config`** — applies to workflows that don't define their own block.
 4. **Built-in defaults.**
 
-For **Docker / `runtime.*` keys**, the `jaiph run` driver merges **`JAIPH_DOCKER_*` env > module-level `runtime.*` > defaults**. Mounts (`runtime.workspace`) are never taken from env. Workflow-level config cannot set runtime keys.
+For **Docker / `runtime.*` keys**, the `jaiph run` driver merges **`JAIPH_DOCKER_*` env > module-level `runtime.*` > CI/unsafe default rule**. The default rule enables Docker when neither `CI=true` nor `JAIPH_UNSAFE=true` is set (see [Sandboxing -- Enabling Docker](sandboxing.md#enabling-docker)). Mounts (`runtime.workspace`) are never taken from env. Workflow-level config cannot set runtime keys.
 
 ### Locked variables
 
diff --git a/docs/index.html b/docs/index.html
index 58771788..5c7793bb 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -462,8 +462,9 @@ <h3>Language</h3>
                 </p>
 
                 <h3>Runtime</h3>
-                <p><strong>Docker sandboxing.</strong> Enable isolated execution with Docker for stronger containment of
-                    agent and shell actions. Configure in <code>config { runtime.* }</code>. See <a
+                <p><strong>Docker sandboxing.</strong> Workflows run inside Docker by default for local development, providing
+                    filesystem and process isolation for agent and shell actions. Disable with <code>JAIPH_UNSAFE=true</code>
+                    or <code>runtime.docker_enabled = false</code>. See <a
                         href="sandboxing">Sandboxing</a>.</p>
                 <p><strong>Hooks.</strong> Attach shell automation to workflow and step lifecycle events via
                     <code>~/.jaiph/hooks.json</code> or <code>&lt;project&gt;/.jaiph/hooks.json</code>. See <a
diff --git a/docs/sandboxing.md b/docs/sandboxing.md
index c8648157..6445008e 100644
--- a/docs/sandboxing.md
+++ b/docs/sandboxing.md
@@ -48,17 +48,29 @@ The host workspace is mounted **read-only** to prevent bind-mount deadlocks with
 
 ### Enabling Docker
 
-Docker sandboxing is opt-in. Set `runtime.docker_enabled = true` in a module-level `config` block:
+Docker sandboxing is **on by default** for local development. When neither `CI=true` nor `JAIPH_UNSAFE=true` is set in the environment, `runtime.docker_enabled` defaults to `true`. In CI environments (`CI=true`) or when the user explicitly opts out with `JAIPH_UNSAFE=true`, the default flips to `false`.
+
+To disable Docker for a local run without setting an environment variable, set `runtime.docker_enabled = false` in a module-level `config` block:
 
 ```jh
 config {
-  runtime.docker_enabled = true
+  runtime.docker_enabled = false
 }
 ```
 
 `runtime.*` keys belong only in module-level config. Placing them in a workflow-level `config` block is a parse error.
 
-The environment variable `JAIPH_DOCKER_ENABLED` overrides the in-file setting when set: only the literal string `"true"` enables Docker; any other value disables it. When unset, the in-file value (default `false`) applies.
+The environment variable `JAIPH_DOCKER_ENABLED` overrides both the in-file setting and the CI/unsafe default when set: only the literal string `"true"` enables Docker; any other value disables it. `JAIPH_UNSAFE=true` is the explicit "run on host / skip Docker default" escape hatch for local development when Docker is unwanted.
+
+**Default rule (when no explicit `JAIPH_DOCKER_ENABLED` or in-file `runtime.docker_enabled` is set):**
+
+| Environment | Default |
+|-------------|---------|
+| Plain local (no `CI`, no `JAIPH_UNSAFE`) | Docker **on** |
+| `CI=true` | Docker **off** |
+| `JAIPH_UNSAFE=true` | Docker **off** |
+
+Explicit overrides (`JAIPH_DOCKER_ENABLED` env or in-file `runtime.docker_enabled`) always take precedence over the default rule.
 
 If Docker is enabled but `docker info` fails, the run exits with `E_DOCKER_NOT_FOUND` -- there is no silent fallback to local execution.
 
@@ -68,7 +80,7 @@ All Docker-related keys live under `runtime.*` in module-level config:
 
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
-| `runtime.docker_enabled` | boolean | `false` | Enable Docker sandbox for the run. |
+| `runtime.docker_enabled` | boolean | `true` locally; `false` when `CI=true` or `JAIPH_UNSAFE=true` | Enable Docker sandbox for the run. |
 | `runtime.docker_image` | string | `"ghcr.io/jaiphlang/jaiph-runtime:<version>"` | Container image. Must already contain `jaiph`. Defaults to the official GHCR runtime image matching the installed jaiph version. |
 | `runtime.docker_network` | string | `"default"` | Docker network mode. |
 | `runtime.docker_timeout` | integer | `300` | Max execution time in seconds. `0` disables the timeout. |
@@ -78,9 +90,9 @@ Each key is type-checked at parse time. Unknown keys produce `E_PARSE`.
 
 #### Environment variable overrides
 
-Following the `JAIPH_*` convention: `JAIPH_DOCKER_ENABLED`, `JAIPH_DOCKER_IMAGE`, `JAIPH_DOCKER_NETWORK`, `JAIPH_DOCKER_TIMEOUT`. Workspace mounts are not overridable via environment.
+Following the `JAIPH_*` convention: `JAIPH_DOCKER_ENABLED`, `JAIPH_DOCKER_IMAGE`, `JAIPH_DOCKER_NETWORK`, `JAIPH_DOCKER_TIMEOUT`. Additionally, `CI` and `JAIPH_UNSAFE` affect the default for `runtime.docker_enabled` (see [Enabling Docker](#enabling-docker)). Workspace mounts are not overridable via environment.
 
-Precedence: environment variable > in-file config > default.
+Precedence: `JAIPH_DOCKER_ENABLED` env > in-file config > CI/unsafe default rule.
 
 If `JAIPH_DOCKER_TIMEOUT` is set but not a valid integer, the default (`300`) is used.
 
diff --git a/e2e/test_all.sh b/e2e/test_all.sh
index 63dd8f66..b3df284b 100755
--- a/e2e/test_all.sh
+++ b/e2e/test_all.sh
@@ -98,6 +98,7 @@ for script in "${TEST_SCRIPTS[@]}"; do
 
   e2e::section "Running ${script_name}"
   if JAIPH_E2E_SKIP_INSTALL=1 \
+    JAIPH_UNSAFE="${JAIPH_UNSAFE:-true}" \
     JAIPH_E2E_TMP_DIR="${JAIPH_E2E_TMP_DIR:-}" \
     JAIPH_E2E_BIN_DIR="${JAIPH_E2E_BIN_DIR}" \
     JAIPH_E2E_WORK_DIR="${JAIPH_E2E_WORK_DIR}" \
diff --git a/package.json b/package.json
index ff3cda41..c342a059 100644
--- a/package.json
+++ b/package.json
@@ -19,7 +19,7 @@
     "build:standalone": "npm run build && node -e \"const fs=require('node:fs'); fs.cpSync('dist/src/runtime','dist/runtime',{recursive:true});\" && bun build --compile ./src/cli.ts --outfile ./dist/jaiph",
     "test:compiler": "npm run build && node --test dist/src/compiler-test-runner.js",
     "test:golden-ast": "npm run build && node --test dist/src/golden-ast-runner.js",
-    "test": "npm run clean && npm run build && NODE_OPTIONS='--max-old-space-size=32768 --enable-source-maps' node --test dist/test/*.test.js $(find dist/src -name '*.test.js' -o -name '*.acceptance.test.js') dist/src/compiler-test-runner.js dist/src/golden-ast-runner.js",
+    "test": "npm run clean && npm run build && JAIPH_UNSAFE=true NODE_OPTIONS='--max-old-space-size=32768 --enable-source-maps' node --test dist/test/*.test.js $(find dist/src -name '*.test.js' -o -name '*.acceptance.test.js') dist/src/compiler-test-runner.js dist/src/golden-ast-runner.js",
     "test:acceptance:compiler": "npm run build && node --test $(find dist/src -name '*.acceptance.test.js')",
     "test:acceptance:runtime": "bash ./e2e/test_all.sh",
     "test:acceptance": "npm run test:acceptance:compiler && npm run test:acceptance:runtime",
diff --git a/src/runtime/docker.test.ts b/src/runtime/docker.test.ts
index e706697d..473a0803 100644
--- a/src/runtime/docker.test.ts
+++ b/src/runtime/docker.test.ts
@@ -139,9 +139,9 @@ test("parseMounts: throws when no workspace mount", () => {
 // resolveDockerConfig
 // ---------------------------------------------------------------------------
 
-test("resolveDockerConfig: defaults when no in-file and no env", () => {
+test("resolveDockerConfig: defaults when no in-file and no env — Docker on", () => {
   const cfg = resolveDockerConfig(undefined, {});
-  assert.equal(cfg.enabled, false);
+  assert.equal(cfg.enabled, true);
   assert.ok(cfg.image.startsWith(GHCR_IMAGE_REPO + ":"), `default image should be GHCR: ${cfg.image}`);
   assert.equal(cfg.network, "default");
   assert.equal(cfg.timeout, 300);
@@ -183,6 +183,26 @@ test("resolveDockerConfig: env JAIPH_DOCKER_ENABLED=true overrides CI default",
   assert.equal(cfg.enabled, true);
 });
 
+test("resolveDockerConfig: JAIPH_UNSAFE=true disables Docker by default", () => {
+  const cfg = resolveDockerConfig(undefined, { JAIPH_UNSAFE: "true" });
+  assert.equal(cfg.enabled, false);
+});
+
+test("resolveDockerConfig: JAIPH_UNSAFE=true with in-file override enables Docker", () => {
+  const cfg = resolveDockerConfig({ dockerEnabled: true }, { JAIPH_UNSAFE: "true" });
+  assert.equal(cfg.enabled, true);
+});
+
+test("resolveDockerConfig: JAIPH_UNSAFE=true with env JAIPH_DOCKER_ENABLED=true enables Docker", () => {
+  const cfg = resolveDockerConfig(undefined, { JAIPH_UNSAFE: "true", JAIPH_DOCKER_ENABLED: "true" });
+  assert.equal(cfg.enabled, true);
+});
+
+test("resolveDockerConfig: both CI and JAIPH_UNSAFE unset with explicit JAIPH_DOCKER_ENABLED=false disables", () => {
+  const cfg = resolveDockerConfig(undefined, { JAIPH_DOCKER_ENABLED: "false" });
+  assert.equal(cfg.enabled, false);
+});
+
 test("resolveDockerConfig: network env override", () => {
   const cfg = resolveDockerConfig(undefined, { JAIPH_DOCKER_NETWORK: "none" });
   assert.equal(cfg.network, "none");
diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts
index 06f6df5d..32cce1c5 100644
--- a/src/runtime/docker.ts
+++ b/src/runtime/docker.ts
@@ -145,21 +145,27 @@ const DEFAULTS: DockerRunConfig = {
 
 /**
  * Resolve effective Docker config.
- * Precedence: env vars (`JAIPH_DOCKER_*`) > in-file RuntimeConfig > defaults.
- * Docker is disabled by default; opt in via config or env.
+ * Precedence: env vars (`JAIPH_DOCKER_*`) > in-file RuntimeConfig > CI/unsafe default rule.
+ *
+ * Default rule (when no explicit override is set):
+ *  - `CI=true` or `JAIPH_UNSAFE=true` → Docker off
+ *  - Otherwise → Docker on
  */
 export function resolveDockerConfig(
   inFile: RuntimeConfig | undefined,
   env: Record<string, string | undefined>,
 ): DockerRunConfig {
-  // enabled: env > in-file > default (false)
+  // enabled: env JAIPH_DOCKER_ENABLED > in-file > CI/unsafe default rule
   let enabled: boolean;
   if (env.JAIPH_DOCKER_ENABLED !== undefined) {
     enabled = env.JAIPH_DOCKER_ENABLED === "true";
   } else if (inFile?.dockerEnabled !== undefined) {
     enabled = inFile.dockerEnabled;
   } else {
-    enabled = DEFAULTS.enabled;
+    // Default: Docker on unless CI or unsafe mode is active
+    const isCI = env.CI === "true";
+    const isUnsafe = env.JAIPH_UNSAFE === "true";
+    enabled = !(isCI || isUnsafe);
   }
 
   // image: env > in-file > default

From 6a0e25330d2186ae5b8a8ed34af03bd8a450385d Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Sat, 18 Apr 2026 09:30:50 +0200
Subject: [PATCH 09/38] Feat: Export workspace patch on Docker run teardown

Docker runs enforce an immutability contract: the host workspace is
bind-mounted read-only and /jaiph/workspace is a sandbox-local
copy-on-write layer discarded on exit. The only persistence channel
to the host is the run-artifacts directory. During teardown, the
runtime now automatically exports a workspace.patch file (git diff
--binary) into the run directory so sandbox edits can be reviewed or
applied on the host. Patch export is best-effort, owned by the
runtime (not workflow logic), and runs regardless of workflow exit
status. When there are no changes, the file is omitted.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CHANGELOG.md                                |  1 +
 QUEUE.md                                    | 27 +++++-----
 docs/architecture.md                        |  1 +
 docs/artifacts.md                           |  2 +
 docs/sandboxing.md                          |  4 ++
 src/runtime/docker.test.ts                  | 60 +++++++++++++++++++++
 src/runtime/docker.ts                       | 38 +++++++++++++
 src/runtime/kernel/node-workflow-runtime.ts |  9 ++++
 8 files changed, 128 insertions(+), 14 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 052f85e9..ccd2e664 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ## All changes
 
+- **Feature — Docker:** Workspace immutability contract and patch export — Docker runs now enforce an explicit immutability contract: the host workspace is bind-mounted read-only and the writable `/jaiph/workspace` inside the container is a sandbox-local copy-on-write layer discarded on exit. The only persistence channel from a Docker run to the host is the run-artifacts directory (`/jaiph/run` → host `.jaiph/runs`). During teardown, the runtime automatically exports a `workspace.patch` file (best-effort `git diff --binary` after `git add -N .`) into the run directory so sandbox edits can be reviewed or applied on the host with `git apply`. Patch export is runtime teardown behavior owned by `NodeWorkflowRuntime`, not workflow logic — it runs regardless of workflow exit status and failures are reported on stderr without changing the workflow's reported status. When there are no workspace changes, `workspace.patch` is omitted (not created). Non-Docker (local) runs are unaffected. Implementation: `exportWorkspacePatch()` in `src/runtime/docker.ts`, `exportPatchIfDocker()` in `src/runtime/kernel/node-workflow-runtime.ts`. Unit tests for non-empty patch, empty patch, and non-git directory added. Docs updated (`docs/sandboxing.md`, `docs/architecture.md`, `docs/artifacts.md`).
 - **Feature — Docker:** Default Docker when not CI or unsafe — Docker sandboxing is now **on by default** for local development. When neither `CI=true` nor `JAIPH_UNSAFE=true` is set in the environment, `runtime.docker_enabled` defaults to `true`. In CI environments or when `JAIPH_UNSAFE=true` is set, the default is `false`. Explicit overrides (`JAIPH_DOCKER_ENABLED` env var or in-file `runtime.docker_enabled`) always take precedence over the default rule. `JAIPH_UNSAFE=true` is the new explicit escape hatch for local development when Docker is unwanted. Implementation: `resolveDockerConfig()` in `src/runtime/docker.ts`. Unit tests for all env combinations added. Docs updated (`docs/sandboxing.md`, `docs/configuration.md`).
 - **Feature — Docker:** Harden Docker execution environment — Docker sandboxing now enforces least-privilege defaults and explicit boundary controls. Containers launch with `--cap-drop ALL --cap-add SYS_ADMIN --security-opt no-new-privileges`, dropping all Linux capabilities except the one required for fuse-overlayfs and preventing privilege escalation. A mount denylist rejects dangerous host paths (`/`, `/var/run/docker.sock`, `/run/docker.sock`, `/proc`, `/sys`, `/dev` and their subpaths) at validation time with `E_VALIDATE_MOUNT` — both in `validateMounts` and at `buildDockerArgs` time. An environment variable denylist (`SSH_*`, `GPG_*`, `AWS_*`, `GCP_*`, `AZURE_*`, `GOOGLE_*`, `DOCKER_*`, `KUBE*`, `NPM_TOKEN*`) prevents host credentials from leaking into the container; only `JAIPH_*` (except `JAIPH_DOCKER_*`) and agent prefixes (`ANTHROPIC_*`, `CLAUDE_*`, `CURSOR_*`) cross the container boundary. New exports: `validateMountHostPath`, `isEnvDenied`, `ENV_DENYLIST_PREFIXES`. Documentation adds a threat-model section (what Docker does and does not protect against), a failure-modes reference table (`E_DOCKER_*` / `E_VALIDATE_MOUNT` / `E_TIMEOUT`), expanded network-mode guidance, and the env denylist specification. Implementation: `src/runtime/docker.ts` (mount denylist, env denylist, security flags), `src/runtime/docker.test.ts` (unit tests for all new paths). Docs updated (`docs/sandboxing.md`).
 - **Feature — Language:** Optional module manifest keys (`module.name`, `module.version`, `module.description`) — The module-level `config { }` block now accepts three optional descriptive metadata keys: `module.name`, `module.version`, and `module.description`. All three are strings, all optional, and purely informational — they do not affect agent, run, or runtime behavior. Values are stored on `WorkflowMetadata.module` and round-trip through `jaiph format`. No semver validation is applied to `module.version`; any quoted string is accepted. Workflow-level `config` blocks reject `module.*` keys with `E_PARSE`, consistent with the existing `runtime.*` workflow guard. Future features (e.g. MCP tool metadata) may consume these fields. Implementation: `ALLOWED_KEYS` and `assignConfigKey` in `src/parse/metadata.ts`, `WorkflowMetadata.module` in `src/types.ts`, formatter round-trip in `src/format/emit.ts`, workflow-level rejection in `src/parse/workflows.ts`. Unit tests cover happy path, partial keys, coexistence with other config keys, formatter round-trip, and workflow-level rejection. Docs updated (`docs/configuration.md`, `docs/grammar.md`).
diff --git a/QUEUE.md b/QUEUE.md
index 102f91b6..231e8c35 100644
--- a/QUEUE.md
+++ b/QUEUE.md
@@ -12,46 +12,45 @@ Process rules:
 
 ***
 
-## Docker sandbox: Figure out a way to pass code changes from engineer.jh in docker mode to local env (git patch saved to .jaiph/runs?)
-
-## Runtime — credential proxy for Docker mode
+## Runtime — credential proxy for Docker mode #dev-ready
 
 **Goal**
 Containers should never hold real API keys. Implement a host-side HTTP proxy (the Phantom Token pattern) that intercepts outbound API requests from containers, strips a placeholder credential, and injects the real key from the host process environment before forwarding upstream. The workload in the container never receives the real secret.
 
 **Design**
 
-1. **Host-side proxy** — A lightweight Node HTTP server bound to an address **reachable from the container network** (typically **`0.0.0.0:<ephemeral-port>`** on the host; binding only `127.0.0.1` is often wrong for container-to-host access). For each request: replace placeholder auth with the real `ANTHROPIC_API_KEY` from the host, forward to the real Anthropic API base URL from host configuration, stream the response back (including SSE).
-2. **Container env injection** — In `src/runtime/docker.ts` (`buildDockerArgs` / env passed into `-e`): pass `ANTHROPIC_API_KEY=<placeholder>` and `ANTHROPIC_BASE_URL=http://host.docker.internal:<port>` (or `http://<host-gateway>:<port>`). Never pass the real key in `-e`.
+1. **Host-side proxy** — A lightweight Node HTTP server bound to an address **reachable from the container network** (typically **`0.0.0.0:<ephemeral-port>`** on the host; binding only `127.0.0.1` is often wrong for container-to-host access). For each request: replace placeholder auth with the real `ANTHROPIC_API_KEY` from the host, forward to the real Anthropic API base URL from host configuration (`ANTHROPIC_BASE_URL` when set, otherwise the Anthropic default), stream the response back (including SSE).
+2. **Container env injection** — In `src/runtime/docker.ts` (`buildDockerArgs` / env passed into `-e`): pass `ANTHROPIC_API_KEY=<placeholder>` and `ANTHROPIC_BASE_URL=http://host.docker.internal:<port>` (or `http://<host-gateway>:<port>`). Never pass the real key in `-e`. Use one **fixed placeholder string** (for example `__JAIPH_ANTHROPIC_KEY_PLACEHOLDER__`) defined in one place and shared by the proxy and Docker env wiring so tests stay deterministic.
 3. **Linux networking** — When using the hostname `host.docker.internal`, add **`--add-host=host.docker.internal:host-gateway`** to the `docker run` argument list where supported so the name resolves inside the container.
-4. **Backends (v1 scope)** — **Claude / Anthropic only.** The Anthropic SDK and `claude` CLI honor `ANTHROPIC_BASE_URL`. **Cursor (`cursor-agent`)** does not have a documented equivalent to `ANTHROPIC_BASE_URL` in public Cursor CLI docs; **leave Cursor and codex (`OPENAI_*`) out of this task** and open a follow-up if the product needs the same guarantee there.
-5. **Routing** — **Single listen port** and a single Anthropic-compatible upstream in v1. Multi-upstream path routing is deferred.
-6. **Non-goals (v1)** — Rate limits and audit logging.
-7. **Lifecycle** — Start the proxy before the first `spawnDockerProcess` for that Jaiph process; stop it when tearing down the Docker run (and on Jaiph exit), with reference counting if multiple Docker runs can occur in one process.
+4. **Activation** — Start the proxy when Docker mode is active **and** the host has a real `ANTHROPIC_API_KEY` to protect (if unset, no proxy). Non-Docker runs unchanged.
+5. **Backends (v1 scope)** — **Claude / Anthropic only.** The Anthropic SDK and `claude` CLI honor `ANTHROPIC_BASE_URL`. **Cursor (`cursor-agent`)** does not have a documented equivalent to `ANTHROPIC_BASE_URL` in public Cursor CLI docs; **leave Cursor and codex (`OPENAI_*`) out of this task** and open a follow-up if the product needs the same guarantee there.
+6. **Routing** — **Single listen port** and a single Anthropic-compatible upstream in v1. Multi-upstream path routing is deferred.
+7. **Non-goals (v1)** — Rate limits and audit logging.
+8. **Lifecycle** — Start the proxy immediately before `spawnDockerProcess` when activation applies; stop it in `cleanupDocker` (or paired helper) when the Docker run tears down. The only current call site is `src/cli/commands/run.ts` (one Docker run per CLI process); structure so multiple spawns could refcount later if needed.
 
 **Context**
 
 * Pattern reference: [NanoClaw credential proxy](https://jonno.nz/posts/nanoclaw-architecture-masterclass-in-doing-less/).
-* **Implementation touchpoints** — `src/runtime/docker.ts` (primary: `-e` forwarding, optional extra Docker flags), `src/cli/commands/run.ts` (spawn/cleanup lifecycle). Agent CLI args/env preparation: `src/runtime/kernel/prompt.ts` (likely unchanged).
+* **Implementation touchpoints** — New small module for the HTTP proxy; `src/runtime/docker.ts` (primary: `-e` forwarding, optional extra Docker flags); `src/cli/commands/run.ts` if wiring cannot live entirely in `docker.ts`. `src/runtime/kernel/prompt.ts` likely unchanged. Expect `src/runtime/docker.test.ts` updates for new `-e` behavior.
 * Image template: `.jaiph/Dockerfile`.
 
 **Queue coordination**
 
-* This edits the same `docker.ts` / Docker spawn path as the queued **Docker — strict image contract + GHCR** task—land together or immediately after to reduce merge churn.
+* Coordinate merges with other Docker/runtime work in this queue (for example **Docker sandbox: Figure out a way to pass code changes from engineer.jh in docker mode to local env**) to limit churn on `docker.ts` / `run.ts`.
 * Later **Runtime — harden Docker execution environment** may tighten env policy; document proxy-related variables when that work lands.
 
 **Acceptance criteria**
 
-* Host-side proxy starts automatically when Docker mode is active (Anthropic/Claude path).
+* Host-side proxy starts automatically under the activation rule above.
 * Containers receive only a placeholder `ANTHROPIC_API_KEY` — no real Anthropic API key in container environment.
 * `claude` CLI calls from inside Docker succeed via the proxy.
 * Proxy handles streaming responses (SSE) correctly.
 * Real keys do not appear in Jaiph-supplied container `-e` values (so they do not appear in `docker inspect` for those vars or in container `printenv` for them as anything but the placeholder).
-* macOS and Linux: documented/working host reachability (`host.docker.internal` + `host-gateway` on Linux as needed, or an equivalent bridge address).
+* macOS and Linux: documented/working host reachability (`host.docker.internal` + `host-gateway` on Linux as needed, or an equivalent bridge address). Update `docs/sandboxing.md` so the credential-forwarding section matches shipped behavior (replacing the prior “when the credential-proxy feature lands” wording).
 
 **Scope note**
 
-* Target **\~3 files**: one small new module for the proxy plus focused edits in `docker.ts` and `run.ts`. Plain functions, no new abstraction layers.
+* Target **~3 production files**: one small new module for the proxy plus focused edits in `docker.ts` and `run.ts`; tests updated alongside. Plain functions, no new abstraction layers.
 
 ## `jaiph serve` — expose workflows as an MCP server #dev-ready
 
diff --git a/docs/architecture.md b/docs/architecture.md
index 936e3022..b96d1bb9 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -57,6 +57,7 @@ All orchestration — local `jaiph run`, `jaiph test`, and **Docker `jaiph run`*
 
 - **Docker runtime helper (`src/runtime/docker.ts`)**
   - Parses mount specs, resolves Docker config (image, network, timeout), and builds the `docker run` invocation used by `jaiph run --docker`. The container runs the same `node-workflow-runner` process as local execution. The default image is the official `ghcr.io/jaiphlang/jaiph-runtime` GHCR image; every selected image must already contain `jaiph` (no auto-install or derived-image build at runtime). The spawn call uses `stdio: ["ignore", "pipe", "pipe"]` — stdin is ignored to prevent the Docker CLI from blocking on stdin EOF, which would stall event streaming and cause the host CLI to hang after the container exits.
+  - **Workspace immutability:** Docker runs cannot modify the host workspace. The host checkout is mounted read-only; `/jaiph/workspace` is a sandbox-local copy-on-write overlay discarded on exit. The only host-writable path is `/jaiph/run` (run artifacts). During teardown, `exportWorkspacePatch()` emits a `workspace.patch` file (best-effort `git diff --binary`) into the run directory so sandbox edits can be reviewed or applied on the host. See [Sandboxing](sandboxing.md) for the full contract.
 
 ## Runtime vs CLI responsibilities
 
diff --git a/docs/artifacts.md b/docs/artifacts.md
index be79f8ed..62abad47 100644
--- a/docs/artifacts.md
+++ b/docs/artifacts.md
@@ -22,6 +22,7 @@ The runtime uses a UTC-dated hierarchy. Each run gets its own folder: date, then
       inbox/                           # inbox message files (when channels are used)
       .seq                             # step-sequence counter (kernel/seq-alloc.ts)
       run_summary.jsonl                # durable event timeline
+      workspace.patch                  # (Docker only) git diff of sandbox workspace changes
 ```
 
 Sequence prefixes are **monotonic and unique** per run (allocated in the kernel), so artifact names sort in execution order. For how this fits into the CLI and kernel, see [Architecture — Durable artifact layout](architecture.md#durable-artifact-layout).
@@ -32,6 +33,7 @@ Sequence prefixes are **monotonic and unique** per run (allocated in the kernel)
 - **`run_summary.jsonl`** — Append-only JSONL timeline: workflow boundaries, step start/end, structured log lines, inbox-related events. Useful for tooling and post-run analysis.
 - **`inbox/`** — When you use channels, message payloads can be reflected as files under the run for inspection (see [Inbox & Dispatch](inbox.md)).
 - **`.seq`** — Internal counter backing the numeric prefixes; you normally do not edit it.
+- **`workspace.patch`** — (Docker runs only) A `git diff --binary` patch capturing all workspace modifications made during the run. Generated automatically during runtime teardown when Docker sandboxing is enabled and the workspace has changes. The patch is sufficient to review or `git apply` on the host. Omitted when there are no workspace changes. See [Sandboxing — Workspace patch export](sandboxing.md#runtime-behavior).
 
 ## Keeping runs out of git
 
diff --git a/docs/sandboxing.md b/docs/sandboxing.md
index 6445008e..620e3896 100644
--- a/docs/sandboxing.md
+++ b/docs/sandboxing.md
@@ -143,6 +143,10 @@ The working directory is `/jaiph/workspace`. The host CLI generates `overlay-run
 
 **Run artifacts** -- The host CLI mounts the resolved host runs root at `/jaiph/run:rw` inside the container. By default this is `.jaiph/runs` under the workspace; a relative `JAIPH_RUNS_DIR` is resolved under the workspace; an absolute `JAIPH_RUNS_DIR` must stay within the workspace or the run fails with `E_DOCKER_RUNS_DIR`. `JAIPH_RUNS_DIR` is set to `/jaiph/run` inside the container, so the runtime writes artifacts directly into the requested host path.
 
+**Workspace immutability contract** -- Docker runs cannot directly modify the host workspace. The host checkout is bind-mounted read-only; the writable `/jaiph/workspace` inside the container is a sandbox-local copy-on-write layer (fuse-overlayfs or copy fallback) whose state is discarded on container exit. The only persistence channel from a Docker run to the host is the run-artifacts directory (`/jaiph/run` → host `.jaiph/runs`). Non-Docker (local) runs are unaffected by this contract.
+
+**Workspace patch export** -- When a Docker-backed run modifies files under `/jaiph/workspace`, the runtime automatically exports a `workspace.patch` file into the run directory during teardown (`exportWorkspacePatch` in `docker.ts`, called from `NodeWorkflowRuntime`). The patch is generated with `git diff --binary` (after `git add -N .` for untracked files) and is sufficient to review or `git apply` on the host. Patch export is best-effort: it runs regardless of workflow exit status, and failures are reported on stderr without changing the workflow's reported status. When there are no workspace changes, the `workspace.patch` file is omitted (not created). The bundled `.jaiph/Dockerfile` image includes `git`.
+
 **Network** -- `"default"` omits `--network`, which uses Docker's default bridge network (outbound access allowed). `"none"` passes `--network none` and fully disables networking -- use this for workflows that should not make external calls. Any other value (e.g. a custom Docker network name) is passed through as-is. Set `runtime.docker_network` in config or `JAIPH_DOCKER_NETWORK` in the environment.
 
 **Timeout** -- When `runtime.docker_timeout` is greater than zero, the CLI sends `SIGTERM` to the container process on overrun, followed by `SIGKILL` after a 5-second grace period. The failure message includes `E_TIMEOUT container execution exceeded timeout`.
diff --git a/src/runtime/docker.test.ts b/src/runtime/docker.test.ts
index 473a0803..23dd7c06 100644
--- a/src/runtime/docker.test.ts
+++ b/src/runtime/docker.test.ts
@@ -18,6 +18,7 @@ import {
   isEnvDenied,
   ENV_DENYLIST_PREFIXES,
   GHCR_IMAGE_REPO,
+  exportWorkspacePatch,
   type MountSpec,
   type DockerRunConfig,
   type DockerSpawnOptions,
@@ -696,3 +697,62 @@ test("buildDockerArgs: includes --cap-drop ALL and --security-opt no-new-privile
   assert.ok(secOptIdx >= 0, "--security-opt present");
   assert.equal(args[secOptIdx + 1], "no-new-privileges");
 });
+
+// ---------------------------------------------------------------------------
+// exportWorkspacePatch
+// ---------------------------------------------------------------------------
+
+test("exportWorkspacePatch writes patch when git repo has changes", () => {
+  const dir = mkdtempSync(join(tmpdir(), "jaiph-patch-test-"));
+  const patchOut = join(dir, "workspace.patch");
+  try {
+    const { execSync } = require("node:child_process");
+    execSync("git init", { cwd: dir, stdio: "ignore" });
+    execSync("git config user.email test@test.com", { cwd: dir, stdio: "ignore" });
+    execSync("git config user.name test", { cwd: dir, stdio: "ignore" });
+    // Create initial commit so diff has a baseline
+    writeFileSync(join(dir, "initial.txt"), "initial\n");
+    execSync("git add . && git commit -m init", { cwd: dir, stdio: "ignore" });
+    // Make a change
+    writeFileSync(join(dir, "new-file.txt"), "hello\n");
+
+    const result = exportWorkspacePatch(dir, patchOut);
+    assert.equal(result, true, "should return true when patch is non-empty");
+    assert.ok(existsSync(patchOut), "patch file should exist");
+    const content = readFileSync(patchOut, "utf8");
+    assert.ok(content.includes("new-file.txt"), "patch should reference the new file");
+  } finally {
+    rmSync(dir, { recursive: true, force: true });
+  }
+});
+
+test("exportWorkspacePatch returns false and omits file when no changes", () => {
+  const dir = mkdtempSync(join(tmpdir(), "jaiph-patch-test-"));
+  const patchOut = join(dir, "workspace.patch");
+  try {
+    const { execSync } = require("node:child_process");
+    execSync("git init", { cwd: dir, stdio: "ignore" });
+    execSync("git config user.email test@test.com", { cwd: dir, stdio: "ignore" });
+    execSync("git config user.name test", { cwd: dir, stdio: "ignore" });
+    writeFileSync(join(dir, "initial.txt"), "initial\n");
+    execSync("git add . && git commit -m init", { cwd: dir, stdio: "ignore" });
+
+    const result = exportWorkspacePatch(dir, patchOut);
+    assert.equal(result, false, "should return false when no changes");
+    assert.ok(!existsSync(patchOut), "patch file should not exist");
+  } finally {
+    rmSync(dir, { recursive: true, force: true });
+  }
+});
+
+test("exportWorkspacePatch returns false for non-git directory", () => {
+  const dir = mkdtempSync(join(tmpdir(), "jaiph-patch-test-"));
+  const patchOut = join(dir, "workspace.patch");
+  try {
+    const result = exportWorkspacePatch(dir, patchOut);
+    assert.equal(result, false, "should return false for non-git dir");
+    assert.ok(!existsSync(patchOut), "patch file should not exist");
+  } finally {
+    rmSync(dir, { recursive: true, force: true });
+  }
+});
diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts
index 32cce1c5..f0b7606d 100644
--- a/src/runtime/docker.ts
+++ b/src/runtime/docker.ts
@@ -624,6 +624,44 @@ export function cleanupDocker(result: DockerSpawnResult): void {
   }
 }
 
+// ---------------------------------------------------------------------------
+// Workspace patch export (Docker teardown)
+// ---------------------------------------------------------------------------
+
+/**
+ * Export a git diff of workspace changes to a patch file.
+ * Used during Docker run teardown to capture sandbox-local modifications.
+ *
+ * Contract:
+ * - When there are changes, writes `workspace.patch` (git apply-able).
+ * - When there are no changes, the file is omitted (not created).
+ * - Best-effort: failures are reported on stderr but do not affect workflow exit status.
+ *
+ * @returns true if a non-empty patch was written.
+ */
+export function exportWorkspacePatch(workspaceDir: string, outputPath: string): boolean {
+  try {
+    // Stage intent-to-add for untracked files so they appear in git diff
+    execSync("git add -N .", { cwd: workspaceDir, stdio: "ignore", timeout: 30_000 });
+  } catch {
+    // Not a git repo or no new files — continue to diff
+  }
+  try {
+    const diff = execSync("git diff --binary", {
+      cwd: workspaceDir,
+      timeout: 60_000,
+      maxBuffer: 50 * 1024 * 1024,
+    });
+    if (!diff || diff.length === 0) return false;
+    writeFileSync(outputPath, diff);
+    return true;
+  } catch (err: unknown) {
+    const msg = err instanceof Error ? err.message : String(err);
+    process.stderr.write(`jaiph docker: workspace patch export failed: ${msg}\n`);
+    return false;
+  }
+}
+
 export function findRunArtifacts(
   sandboxRunDir: string,
 ): { runDir?: string; summaryFile?: string } {
diff --git a/src/runtime/kernel/node-workflow-runtime.ts b/src/runtime/kernel/node-workflow-runtime.ts
index 9678840c..2e02a87d 100644
--- a/src/runtime/kernel/node-workflow-runtime.ts
+++ b/src/runtime/kernel/node-workflow-runtime.ts
@@ -17,6 +17,7 @@ import {
   plainMultilineOrchestrationForRuntime,
   tripleQuotedRawForRuntime,
 } from "../orchestration-text";
+import { CONTAINER_WORKSPACE, exportWorkspacePatch } from "../docker";
 
 const MAX_EMBED = 1024 * 1024;
 const MAX_RECURSION_DEPTH = 256;
@@ -388,6 +389,13 @@ export class NodeWorkflowRuntime {
     }
   }
 
+  /** Best-effort: export workspace changes as a patch file for Docker runs. */
+  private exportPatchIfDocker(): void {
+    const ws = this.env.JAIPH_WORKSPACE;
+    if (ws !== CONTAINER_WORKSPACE) return;
+    exportWorkspacePatch(ws, join(this.runDir, "workspace.patch"));
+  }
+
   async runDefault(args: string[]): Promise<number> {
     this.emitWorkflow("WORKFLOW_START", "default");
     const rootScope: Scope = {
@@ -411,6 +419,7 @@ export class NodeWorkflowRuntime {
     });
     const result = await this.executeWorkflow(resolved.filePath, resolved.workflow.name, rootScope, args, false);
     this.emitWorkflow("WORKFLOW_END", "default");
+    this.exportPatchIfDocker();
     this.stopHeartbeat();
     return result.status;
   }

From 348e3e4da0c56d2a2b93b68c1b791ad82d813aa3 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Sun, 19 Apr 2026 14:01:08 +0200
Subject: [PATCH 10/38] Remove target design documentation file to streamline
 project focus and eliminate outdated content.

Signed-off-by: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
---
 QUEUE.md | 216 +++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 155 insertions(+), 61 deletions(-)

diff --git a/QUEUE.md b/QUEUE.md
index 231e8c35..678b851d 100644
--- a/QUEUE.md
+++ b/QUEUE.md
@@ -9,99 +9,193 @@ Process rules:
 5. This queue assumes **hard rewrite semantics**:
    * breaking changes are allowed,
    * backward compatibility is **not** a design goal unless a task explicitly says otherwise.
+6. **Acceptance criteria are non-negotiable.** A task is not done until every acceptance bullet is verified by a test that fails when the contract is violated. "It works on my machine" or "the existing tests pass" is not acceptance.
 
 ***
 
-## Runtime — credential proxy for Docker mode #dev-ready
+## Language/Runtime — add `recover` loop semantics for non-isolated `run` #dev-ready
 
 **Goal**
-Containers should never hold real API keys. Implement a host-side HTTP proxy (the Phantom Token pattern) that intercepts outbound API requests from containers, strips a placeholder credential, and injects the real key from the host process environment before forwarding upstream. The workload in the container never receives the real secret.
+Add `recover` as a first-class repair-and-retry primitive distinct from `catch`. Ship for non-isolated, non-async `run` first. Async composition lands in the next task, not here.
 
-**Design**
+**Scope**
 
-1. **Host-side proxy** — A lightweight Node HTTP server bound to an address **reachable from the container network** (typically **`0.0.0.0:<ephemeral-port>`** on the host; binding only `127.0.0.1` is often wrong for container-to-host access). For each request: replace placeholder auth with the real `ANTHROPIC_API_KEY` from the host, forward to the real Anthropic API base URL from host configuration (`ANTHROPIC_BASE_URL` when set, otherwise the Anthropic default), stream the response back (including SSE).
-2. **Container env injection** — In `src/runtime/docker.ts` (`buildDockerArgs` / env passed into `-e`): pass `ANTHROPIC_API_KEY=<placeholder>` and `ANTHROPIC_BASE_URL=http://host.docker.internal:<port>` (or `http://<host-gateway>:<port>`). Never pass the real key in `-e`. Use one **fixed placeholder string** (for example `__JAIPH_ANTHROPIC_KEY_PLACEHOLDER__`) defined in one place and shared by the proxy and Docker env wiring so tests stay deterministic.
-3. **Linux networking** — When using the hostname `host.docker.internal`, add **`--add-host=host.docker.internal:host-gateway`** to the `docker run` argument list where supported so the name resolves inside the container.
-4. **Activation** — Start the proxy when Docker mode is active **and** the host has a real `ANTHROPIC_API_KEY` to protect (if unset, no proxy). Non-Docker runs unchanged.
-5. **Backends (v1 scope)** — **Claude / Anthropic only.** The Anthropic SDK and `claude` CLI honor `ANTHROPIC_BASE_URL`. **Cursor (`cursor-agent`)** does not have a documented equivalent to `ANTHROPIC_BASE_URL` in public Cursor CLI docs; **leave Cursor and codex (`OPENAI_*`) out of this task** and open a follow-up if the product needs the same guarantee there.
-6. **Routing** — **Single listen port** and a single Anthropic-compatible upstream in v1. Multi-upstream path routing is deferred.
-7. **Non-goals (v1)** — Rate limits and audit logging.
-8. **Lifecycle** — Start the proxy immediately before `spawnDockerProcess` when activation applies; stop it in `cleanupDocker` (or paired helper) when the Docker run tears down. The only current call site is `src/cli/commands/run.ts` (one Docker run per CLI process); structure so multiple spawns could refcount later if needed.
+* Keep existing `catch` behavior as one-attempt try/catch.
+* Add:
 
-**Context**
+  ```jh
+  run sth() recover(err) {
+    ...
+  }
+  ```
 
-* Pattern reference: [NanoClaw credential proxy](https://jonno.nz/posts/nanoclaw-architecture-masterclass-in-doing-less/).
-* **Implementation touchpoints** — New small module for the HTTP proxy; `src/runtime/docker.ts` (primary: `-e` forwarding, optional extra Docker flags); `src/cli/commands/run.ts` if wiring cannot live entirely in `docker.ts`. `src/runtime/kernel/prompt.ts` likely unchanged. Expect `src/runtime/docker.test.ts` updates for new `-e` behavior.
-* Image template: `.jaiph/Dockerfile`.
+  with loop semantics: try, bind failure, run repair block, retry, stop on success or retry-limit exhaustion.
+* Add a small explicit retry limit (default 10) with config override.
+* Keep the runtime behavior simple and observable; do not introduce speculative control-flow abstractions.
 
-**Queue coordination**
+**Required tests**
 
-* Coordinate merges with other Docker/runtime work in this queue (for example **Docker sandbox: Figure out a way to pass code changes from engineer.jh in docker mode to local env**) to limit churn on `docker.ts` / `run.ts`.
-* Later **Runtime — harden Docker execution environment** may tighten env policy; document proxy-related variables when that work lands.
+* Parser / formatter / validation coverage for `recover`.
+* Runtime tests for:
+  - success on first attempt
+  - one or more repair loops before success
+  - retry limit exhaustion
+  - retry limit configured via `config`
+* At least one acceptance test using `recover` to repair and retry a failing run.
 
 **Acceptance criteria**
 
-* Host-side proxy starts automatically under the activation rule above.
-* Containers receive only a placeholder `ANTHROPIC_API_KEY` — no real Anthropic API key in container environment.
-* `claude` CLI calls from inside Docker succeed via the proxy.
-* Proxy handles streaming responses (SSE) correctly.
-* Real keys do not appear in Jaiph-supplied container `-e` values (so they do not appear in `docker inspect` for those vars or in container `printenv` for them as anything but the placeholder).
-* macOS and Linux: documented/working host reachability (`host.docker.internal` + `host-gateway` on Linux as needed, or an equivalent bridge address). Update `docs/sandboxing.md` so the credential-forwarding section matches shipped behavior (replacing the prior “when the credential-proxy feature lands” wording).
-
-**Scope note**
+* `recover` is distinct from `catch`.
+* The retry limit is explicit and configurable.
+* Tests prove loop behavior and limit handling.
+* The docs-site Jaiph syntax highlighter (`docs/assets/js/main.js`, the `STATEMENT_KEYWORDS` set and any keyword-flow special cases) recognizes `recover` as a keyword. Any `.jh` code block on the docs site that uses `recover` renders with the keyword colored.
 
-* Target **~3 production files**: one small new module for the proxy plus focused edits in `docker.ts` and `run.ts`; tests updated alongside. Plain functions, no new abstraction layers.
+***
 
-## `jaiph serve` — expose workflows as an MCP server #dev-ready
+## Runtime — spec and implement `Handle<T>` for `run async`, including `recover` composition #dev-ready
 
 **Goal**
-Add a `jaiph serve <file.jh>` command that starts a stdio MCP server. Each top-level workflow in the file becomes a callable MCP tool. This lets any MCP client (Cursor, Claude Desktop, custom agents) invoke Jaiph workflows directly.
+Replace the current implicit end-of-workflow join with a value-based handle model. `run async foo()` returns a `Handle<T>` immediately. The handle resolves on first non-passthrough read. Workflow exit implicitly joins remaining unresolved handles. Ship `recover` composition for `run async` in the same task.
 
-**Context**
+This task ships **both the written spec and the runtime implementation in one go.** The previous attempt split them across two tasks and the spec drifted from the implementation. Keep them together so the contract and the code land in the same review.
 
-* MCP (Model Context Protocol) uses JSON-RPC 2.0 over stdio. A server must handle `initialize`, `tools/list`, and `tools/call`.
-* Jaiph already has a runtime (`src/runtime/kernel/node-workflow-runtime.ts`) that can execute workflows and capture output.
-* The `@modelcontextprotocol/sdk` npm package provides a Node.js server implementation, but the protocol is simple enough to implement directly (\~200 lines for stdio JSON-RPC + the three methods).
+**Scope**
 
-**Phase 1 — single text input (this task)**
+* Write the spec section in `docs/spec-async-handles.md` (a new file) covering:
+  - `Handle<T>` value model: a handle resolves to whatever the called function returned. First non-passthrough read forces resolution. Passthrough (assignment, storage, passing through arguments and returns unchanged) does not.
+  - Workflow exit implicitly joins any remaining unresolved handles; this is not an error.
+  - No fire-and-forget mode.
+  - `recover` composition: `b1 = run async foo() recover(err) { ... }` — handle resolves to either the eventual success value (after the retry loop runs) or the final failure. Same retry-limit semantics as the non-async `recover` task.
+* Replace the implicit end-of-workflow join in `src/runtime/kernel/node-workflow-runtime.ts` with the value-based handle model.
+* `run async ...` returns a `Handle<T>` value. `T` is the same return type the function would have under a non-async `run`.
+* Reads that force resolution: passing as an argument to `run`, string interpolation, comparison, conditional branching, any other access to the underlying value.
+* Passthrough (assignment, storing in a list, passing through `workflow` arguments and returns unchanged) does not force resolution.
+* Workflow exit implicitly joins unresolved handles. This preserves today's end-of-workflow behavior at the boundary.
+* Parser must accept `recover(err) { ... }` after `run async ref(args)`. The previous attempt had the parser silently reject this with a "trailing content" error — that is the failure mode to fix.
+* Preserve async progress/event visibility unless the contract forces an intentional change.
+* Update docs that still describe the old statement-based async model.
+
+**Required tests**
+
+* Parser / formatter / validation coverage for `run async ref(args) recover(err) { ... }`.
+* Runtime tests for handle creation, transparent resolution at first read, and resolution forced by passing a handle into another `run`.
+* Runtime test for the multi-handle join shape: multiple async handles passed into another call all resolve before the callee runs.
+* Runtime test that workflow exit joins unresolved handles without raising an error.
+* Runtime test that handles can be stored in a list and resolved when read.
+* Runtime test for `run async foo() recover(err) { ... }`: handle resolves to the success value after at least one repair loop.
+* Runtime test that the retry-limit semantics are shared with the non-async `recover` task.
 
-Each workflow becomes a tool with a single `input` string parameter:
+**Acceptance criteria**
+
+* `run async ...` returns a first-class handle value.
+* Handle reads force resolution per the spec.
+* Workflow exit implicitly joins remaining handles (no error).
+* `recover` works on `run async ref()`. The parser accepts the form; the runtime implements the spec contract.
+* Spec and implementation ship in the same change set; the spec is internally consistent and self-contained.
+* The docs-site Jaiph syntax highlighter (`docs/assets/js/main.js`) recognizes `async` as a keyword (modifier on `run`) and continues to highlight `recover` correctly when it appears as `recover(err) { ... }` after `run async ref(args)`. A docs code block with `b1 = run async foo() recover(err) { ... }` renders with `run`, `async`, and `recover` all colored.
+
+***
 
-```json
-{
-  "name": "analyze_gaps",
-  "description": "workflow analyze_gaps from qa.jh",
-  "inputSchema": {
-    "type": "object",
-    "properties": {
-      "input": { "type": "string", "description": "Text input passed to the workflow" }
-    }
+## Artifacts — runtime mount + `artifacts.jh` lib for publishing files out of the sandbox #dev-ready
+
+**Goal**
+Give workflows a clean, versatile way to publish files from inside the whole-program Docker sandbox to a host-readable location. Split the work across two layers:
+
+* **Runtime layer** (in `src/runtime/`): expose a writable artifacts directory inside the sandbox at a stable path, mapped to `.jaiph/runs/<run_id>/artifacts/` on the host. No new language primitive; the runtime's only job is to mount and to surface the path via env var.
+* **Library layer** (in `.jaiph/libs/jaiphlang/`): ship a new `artifacts.jh` lib (mirroring the existing `queue.jh` / `queue.py` pair) with `export workflow` entries for the common operations. Userspace imports the lib explicitly:
+
+  ```jh
+  import "jaiphlang/artifacts.jh" as artifacts
+
+  workflow default() {
+    run artifacts.save("./build/output.bin", "build-output.bin")
+    run artifacts.save_patch("snapshot.patch")
   }
-}
-```
+  ```
+
+This keeps the runtime minimal (just a mount), makes the surface library-shaped (so it's discoverable and replaceable), and matches the established `queue.jh` pattern.
+
+**Context (read before starting)**
+
+* Today's whole-program Docker sandbox in `src/runtime/docker.ts` already mounts the run directory writable at `/jaiph/run`. Artifacts will live in a subdirectory of that mount; no new mount is needed.
+* The existing lib pattern is `.jaiph/libs/jaiphlang/queue.jh` paired with `.jaiph/libs/jaiphlang/queue.py` (a small Python helper invoked via `import script ... as queue`). Follow that pattern.
+* The `isolated` keyword is not part of this codebase. This task is about the whole-program Docker sandbox only; no per-call isolation primitive exists or is to be introduced.
 
-The `input` value is injected into the workflow environment as `JAIPH_MCP_INPUT` (accessible via `${input}` interpolation or `$JAIPH_MCP_INPUT` in scripts). The tool response is the workflow's captured output (log messages + prompt results).
+**Scope**
+
+**Runtime layer:**
+
+* Ensure `.jaiph/runs/<run_id>/artifacts/` exists on the host before the sandbox starts (`mkdirSync` with `recursive: true`).
+* The existing `/jaiph/run` mount in the container already exposes the artifacts subdirectory implicitly. Verify it does, and that writes inside the container land at `.jaiph/runs/<run_id>/artifacts/` on the host.
+* Surface the in-container artifacts path to userspace via an env var. Suggested name: `JAIPH_ARTIFACTS_DIR` (defaulting to `/jaiph/run/artifacts` in the container, `<host_run_dir>/artifacts` on the host when running without the sandbox). The library reads this env var rather than hardcoding the path.
+* When running on the host (no sandbox), `JAIPH_ARTIFACTS_DIR` points at the host artifacts directory directly so the same lib works.
+
+**Library layer:**
+
+* Add `.jaiph/libs/jaiphlang/artifacts.jh` and `.jaiph/libs/jaiphlang/artifacts.py` (or `.sh` if it stays a one-liner). Mirror the `queue.jh` / `queue.py` shape exactly — no novel patterns.
+* Provide these `export workflow` entries:
+  - `save(local_path, name)` — copies the file at `local_path` into `${JAIPH_ARTIFACTS_DIR}/${name}`. Returns the host-resolved absolute path as a string.
+  - `save_patch(name)` — runs `git diff` (working tree vs HEAD) inside the sandbox workspace, writes it to `${JAIPH_ARTIFACTS_DIR}/${name}`. Returns the host-resolved absolute path.
+  - `apply_patch(path)` — applies a patch file to the current workspace via `git apply`. Useful for replaying artifacts across runs.
+* The lib must work both inside the sandbox and on the host (when the user runs `jaiph` without the Docker sandbox). The only difference is what `JAIPH_ARTIFACTS_DIR` resolves to.
+* Document that `save_patch` excludes `.jaiph/` from the produced patch (the runtime writes its own state under `.jaiph/`; including it in a patch would clobber state on apply). The exclusion lives in the lib's helper script, not in the runtime, and is documented inline next to the implementation.
 
-**Phase 2 — typed parameters (future task)**
+**Required tests**
 
-Extend the language with workflow parameters: `workflow analyze(file: string, depth: number) { ... }`. These map directly to the tool's `inputSchema`. Not in scope for this task.
+* **Runtime tests**:
+  - `JAIPH_ARTIFACTS_DIR` is set inside the sandbox and points at a writable directory.
+  - `JAIPH_ARTIFACTS_DIR` is set when running on the host (no sandbox) and points at `.jaiph/runs/<run_id>/artifacts/`.
+  - The artifacts directory exists before the sandbox starts (no race where the lib tries to write before the dir exists).
+* **Library tests**:
+  - `artifacts.save(local_path, name)`: file is created at the host path; return value matches that path; file content equals the source.
+  - `artifacts.save_patch(name)`: produces a non-empty patch when the workspace has uncommitted changes; produces an empty (or absent) patch when the workspace is clean; the patch does not reference `.jaiph/` even when `.jaiph/` files have changed.
+  - `artifacts.apply_patch(path)`: applies a previously-saved patch cleanly; fails with a clear error when the patch does not apply.
+* **End-to-end**:
+  - One `.jh` example workflow that imports `jaiphlang/artifacts.jh`, calls `artifacts.save` and `artifacts.save_patch`, runs under the sandbox, and the test asserts both files appear on the host at the expected paths.
+
+**Acceptance criteria**
+
+* `.jaiph/runs/<run_id>/artifacts/` exists, is writable from inside the sandbox, and survives sandbox teardown (it's on the host filesystem via the existing mount).
+* `JAIPH_ARTIFACTS_DIR` is exposed in both sandbox and host execution; the lib reads it rather than hardcoding paths.
+* `.jaiph/libs/jaiphlang/artifacts.jh` ships with `save`, `save_patch`, `apply_patch` as `export workflow` entries, mirroring the `queue.jh` lib shape.
+* The lib works identically inside the sandbox and on the host.
+* `save_patch`'s `.jaiph/` exclusion is documented inline in the helper script.
+* No new runtime language primitive is introduced. The user-facing surface is `import` + workflow calls.
+* The docs-site documentation is updated to describe the artifacts lib alongside the queue lib (`docs/libraries.md` or equivalent).
+
+***
+
+## Runtime — PTY-based TTY test for `run async` #dev-ready
+
+**Goal**
+Live progress for `run async` (with handles, deferred resolution, multi-branch fan-out without isolation) takes a different render path than synchronous steps. Close the regression-coverage gap by exercising that path through a real PTY.
+
+**Context (read before starting)**
+
+`e2e/tests/81_tty_progress_tree.sh` already uses Python's `pty.openpty()` to drive `jaiph run` under a real TTY and asserts on the rendered progress frames. It covers non-async workflows. There is no equivalent for `run async`. The host progress renderer takes a different path for async (handles, deferred resolution, multiple in-flight calls competing for the live frame), and that path has been broken before without any test catching it.
 
 **Scope**
 
-1. **CLI command** (`src/cli/commands/serve.ts`): add `jaiph serve <file.jh>` that parses the file, starts a stdio JSON-RPC server, and handles `initialize`, `tools/list`, `tools/call`.
-2. **Tool listing**: read the parsed module's `workflows` array. Each workflow becomes a tool entry with `name` \= workflow name, `description` \= `"workflow <name> from <filename>"`, `inputSchema` \= single `input` string.
-3. **Tool execution**: on `tools/call`, run the named workflow using the existing runtime. Capture all output (logs, prompt results). Return as `content: [{ type: "text", text: output }]`.
-4. **Error handling**: if the workflow fails, return `isError: true` with the error message.
-5. **Config inheritance**: the `.jh` file's `config { ... }` block applies normally (backend, model, etc.).
-6. **E2E test**: a test that starts `jaiph serve` with a simple workflow, sends JSON-RPC messages via stdin, and verifies the tool list and a tool call response.
-7. **Docs**: add a section to `docs/index.html` and `docs/jaiph-skill.md` about MCP server mode.
+* Add an e2e test (sibling of `e2e/tests/81_tty_progress_tree.sh`) that:
+  * spawns `jaiph run` under a real PTY,
+  * exercises a workflow that uses `run async branch()` with at least two concurrent async calls,
+  * each branch emits multiple progress events over time (use a deterministic step like a sleep loop with `print` calls — do not depend on `prompt claude` or any other non-deterministic step),
+  * captures the PTY output and asserts:
+    1. each branch's progress events appear under that branch's node in the tree as they happen,
+    2. the final frame shows both branches as completed with their resolved return values,
+    3. no ANSI corruption (orphaned escape sequences, stray cursor moves outside the rendered region).
+* The test must fail today against any regression that batches async progress events at branch completion, drops them, or scrambles the frame.
+
+**Non-goals**
+
+* Do not test `prompt claude` or any non-deterministic step. Branches must emit synthetic, time-spaced events.
+* Do not assert on exact frame timing; assert on order and presence within a generous timeout.
+* No `isolated` variant — that keyword is not part of this codebase.
 
 **Acceptance criteria**
 
-* `jaiph serve examples/greeting.jh` starts a stdio MCP server.
-* `tools/list` returns one tool per workflow.
-* `tools/call` executes the workflow and returns its output.
-* Errors produce `isError: true` responses (no server crash).
-* E2E test passes.
+* New test lives next to `e2e/tests/81_tty_progress_tree.sh` and follows the same shell-driving-Python-PTY pattern.
+* The test passes on a green build and fails when the live-progress path for `run async` regresses.
+* Test runs as part of the standard e2e suite (no separate invocation).
 
 ***

From 58bbcca54e95974f48a3d8f393f7a3bc6e3e7ef7 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Mon, 20 Apr 2026 13:57:20 +0200
Subject: [PATCH 11/38] Queue: Add cleanup tasks

Signed-off-by: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
---
 QUEUE.md | 201 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 201 insertions(+)

diff --git a/QUEUE.md b/QUEUE.md
index 678b851d..68afd335 100644
--- a/QUEUE.md
+++ b/QUEUE.md
@@ -199,3 +199,204 @@ Live progress for `run async` (with handles, deferred resolution, multi-branch f
 * Test runs as part of the standard e2e suite (no separate invocation).
 
 ***
+
+## Cleanup — delete top-level debug cruft and harden `.gitignore` #dev-ready
+
+**Goal**
+The repo root contains 22+ leftover debug directories from an abandoned per-call isolated experiment (`docker-nested-arg.*`, `docker-nested-clean.*`, `overlay-warn.*`, `nested-run-arg.*`, `local-nested-arg.*`, `overlay-manual.*`, `docker-live-debug.*`), plus stale `.tmp`, `.tmp-build`, `.tmp-debug`, `.tmp_run_debug`, `QUEUE.md.tmp.4951`, `safe_name`, top-level `lib/`, top-level `run/`. None are in `.gitignore`. Fix that, in one pass, so the workspace is readable at a glance and these don't return.
+
+**Scope**
+
+* Delete every leftover debug directory at the repo root matching `docker-*/`, `nested-*/`, `overlay-*/`, `local-*/`, `.tmp*/`. Verify with `git ls-files <pattern>` first that they are not tracked (they should not be).
+* Investigate three suspicious top-level paths: `safe_name`, `lib/`, `run/`. The default disposition is **delete**. Only keep one if you can identify a live consumer in the source tree (search with `rg`/`grep` for the path string). If a consumer exists, document it inline next to the deletion decision.
+* Delete tracked cruft files: `safe_name` and `QUEUE.md.tmp.4951`. Verify they are tracked first (`git ls-files`); use `git rm` rather than `rm` for tracked paths.
+* Add patterns to `.gitignore` so they cannot return without a deliberate override:
+  - `docker-*/`
+  - `nested-*/`
+  - `overlay-*/`
+  - `local-*/`
+  - `.tmp*/`
+  - `QUEUE.md.tmp.*`
+* Sanity-check: after the cleanup, `ls` at the repo root should show only documented project directories. No `.cidfile`, no `.pid`, no random temp dir names.
+
+**Non-goals**
+
+* Do not touch `.jaiph/runs/`, `dist/`, `node_modules/` — already in `.gitignore` and load-bearing.
+* Do not delete the `docker/` directory (singular, no suffix) — that is a different, intentional location.
+* No code changes; this task is filesystem hygiene only.
+
+**Acceptance criteria**
+
+* Repo root listing contains zero `docker-*`, `nested-*`, `overlay-*`, `local-*`, or `.tmp*` directories after the change.
+* `.gitignore` contains the patterns listed above; `git status` is clean immediately after deletion.
+* Disposition of `safe_name`, `lib/`, `run/` is recorded in the commit message (deleted, kept-and-why).
+* A second `npm run build && npm test` after the cleanup passes (proves nothing important was removed).
+
+***
+
+## Cleanup — remove dead per-call-isolated leftovers from `src/runtime/docker.ts` #dev-ready
+
+**Goal**
+`src/runtime/docker.ts` (688 LoC) still exports four functions written exclusively for the now-abandoned per-call `isolated` keyword: `exportWorkspacePatch`, `findRunArtifacts`, plus the helper `exportPatchIfDocker` in `src/runtime/kernel/node-workflow-runtime.ts`. These have one or two live callers each, all of which are themselves transitional code from the same abandoned design. Once the new `artifacts.jh` lib has landed (it replaces the use case end-to-end), these can go. Net reduction: ~200 LoC of source + ~150 LoC of dead tests in `src/runtime/docker.test.ts`.
+
+**Context (read before starting)**
+
+* `exportWorkspacePatch(workspaceDir, outputPath)` writes a `git diff` patch when running inside the Docker sandbox. Single live caller: `NodeWorkflowRuntime.exportPatchIfDocker()` (in `src/runtime/kernel/node-workflow-runtime.ts`), which writes `<runDir>/workspace.patch` at workflow end. The new `artifacts.save_patch()` workflow in `.jaiph/libs/jaiphlang/artifacts.jh` (shipped by the artifacts task) replaces this use case explicitly: callers who want a patch ask for one by name, with the path returned to them.
+* `findRunArtifacts(sandboxRunDir)` discovers the latest run dir under a Docker-mounted artifacts area. Single live caller: `src/cli/commands/run.ts:367` — the host reads it after the sandbox exits to surface the inner run's artifacts. With the artifacts task's explicit `JAIPH_ARTIFACTS_DIR` mount and known path, this discovery is no longer needed: the host already knows where to look.
+* The `isolated` keyword is not part of this codebase. There is no per-call isolation primitive to keep these helpers alive for.
+
+**Scope**
+
+* **Precondition check**: before deleting, run `rg 'exportWorkspacePatch|findRunArtifacts|exportPatchIfDocker' src/` and verify the only callers are the ones listed above. If any new caller has appeared, evaluate it on the spot — either it is also dead and can go in this task, or removal is blocked and you stop and report.
+* **Precondition check**: confirm the artifacts task has shipped (look for `.jaiph/libs/jaiphlang/artifacts.jh` and a working `artifacts.save_patch`). If it has not, this task is not ready — do not attempt half-removal that breaks the runtime.
+* Remove from `src/runtime/docker.ts`:
+  - `exportWorkspacePatch` (function + export)
+  - `findRunArtifacts` (function + export)
+* Remove from `src/runtime/kernel/node-workflow-runtime.ts`:
+  - `exportPatchIfDocker` (private method)
+  - The import of `exportWorkspacePatch` from `../docker`
+  - Any call site of `exportPatchIfDocker` (verify zero remain after the method is gone)
+* Remove from `src/cli/commands/run.ts`:
+  - The `findRunArtifacts(sandboxRunDir)` call at line ~367
+  - The import of `findRunArtifacts`
+  - Any code that consumes the result of `findRunArtifacts` and is now dead (chase the value, do not leave dangling variables)
+* Remove from `src/runtime/docker.test.ts`:
+  - All `findRunArtifacts: ...` test cases
+  - All `exportWorkspacePatch: ...` test cases
+  - The shared test fixtures used only by those tests
+
+**Non-goals**
+
+* Do not touch `writeOverlayScript`, `overlayMountPath`, `buildDockerArgs`, or other docker.ts functions — those remain load-bearing for the whole-program Docker sandbox.
+* Do not modify the artifacts lib or its runtime mount; this task only removes the predecessor primitives.
+* Do not collapse env vars or config keys — that is a separate concern explicitly out of scope.
+
+**Acceptance criteria**
+
+* `rg 'exportWorkspacePatch|findRunArtifacts|exportPatchIfDocker' src/` returns zero matches.
+* `npm run build` succeeds with no TypeScript errors after removal.
+* `npm test` passes (proves no remaining test depends on the deleted primitives).
+* Net diff: ~200 LoC removed from `src/runtime/docker.ts` and `src/runtime/kernel/node-workflow-runtime.ts`, ~150 LoC of dead tests removed from `src/runtime/docker.test.ts`. If your diff is materially smaller, you missed something; if materially larger, you are deleting more than the task scope — stop and reassess.
+
+***
+
+## Cleanup — consolidate the 5-way test directory split #dev-ready
+
+**Goal**
+Today there are five different places that contain "tests": `src/**/*.test.ts` (66 unit tests, adjacent to source), `test/` (4 integration files including a 2427-LoC `sample-build.test.ts`), `tests/e2e-samples/` (a single Playwright file), `compiler-tests/` (txtar fixtures), `golden-ast/` (fixtures + expected). Plus runners `src/compiler-test-runner.ts` and `src/golden-ast-runner.ts` mixed into the production source tree. A new contributor cannot tell where a new test belongs without reading the whole layout. Fix the structure in one pass.
+
+**Context (read before starting)**
+
+* The current `package.json` `test` script enumerates the test sources explicitly; this gives us a precise inventory of what is wired in:
+  ```
+  dist/test/*.test.js
+  dist/src/**/*.test.js
+  dist/src/**/*.acceptance.test.js
+  dist/src/compiler-test-runner.js
+  dist/src/golden-ast-runner.js
+  ```
+  Any move must update this script and keep the same test set running. Adding tests is out of scope; this is purely reorganization.
+* `src/compiler-test-runner.ts` and `src/golden-ast-runner.ts` are compiled and shipped in `dist/`, but they are test infrastructure (they consume fixtures, produce assertions). They should not live in `src/`.
+* `compiler-tests/README.md` already documents the txtar format — preserve that doc next to the fixtures it describes.
+
+**Scope**
+
+* **Move test infrastructure out of `src/`**:
+  - `src/compiler-test-runner.ts` → `test-infra/compiler-test-runner.ts`
+  - `src/golden-ast-runner.ts` → `test-infra/golden-ast-runner.ts`
+  - `tsconfig.json` and `package.json` `test` script updated to reference the new locations.
+* **Rename and group fixture directories**:
+  - `compiler-tests/` → `test-fixtures/compiler-txtar/` (preserves the README inside).
+  - `golden-ast/` → `test-fixtures/golden-ast/` (preserves the `fixtures/` and `expected/` subdirs underneath).
+  - Update path references in `test-infra/compiler-test-runner.ts` and `test-infra/golden-ast-runner.ts`.
+* **Fold the singleton Playwright test**:
+  - `tests/e2e-samples/landing-page.spec.ts` → `e2e/playwright/landing-page.spec.ts`.
+  - Update `playwright.config.ts` and the `test:samples` npm script accordingly.
+  - Delete the now-empty `tests/` directory.
+* **Triage `test/` (4 files, 2960 LoC)**:
+  - `test/run-summary-jsonl.test.ts` (178 LoC), `test/signal-lifecycle.test.ts` (220 LoC), `test/tty-running-timer.test.ts` (135 LoC) — keep in a renamed `integration/` directory. They are integration-flavored, not unit, and don't have an obvious adjacent home.
+  - `test/sample-build.test.ts` (2427 LoC) — split. Read the file, group its tests by which subsystem they actually exercise, and move each group either next to that subsystem (`src/.../<name>.integration.test.ts`) or into `integration/sample-build/<topic>.test.ts`. Aim for no resulting file over ~600 LoC. The split is the work; it is not optional.
+  - Move `test/expected/` and `test/fixtures/` to `test-fixtures/sample-build/` if any test still references them after the split.
+* **Final layout** (target):
+  ```
+  src/**/*.test.ts                       # unit, adjacent (unchanged)
+  src/**/*.acceptance.test.ts            # acceptance, adjacent (unchanged)
+  integration/**/*.test.ts               # integration tests (was `test/`, after split)
+  test-fixtures/compiler-txtar/          # was `compiler-tests/`
+  test-fixtures/golden-ast/              # was `golden-ast/`
+  test-fixtures/sample-build/            # if any sample-build fixtures survive the split
+  test-infra/compiler-test-runner.ts     # was `src/compiler-test-runner.ts`
+  test-infra/golden-ast-runner.ts        # was `src/golden-ast-runner.ts`
+  e2e/                                   # shell + .jh (unchanged)
+  e2e/playwright/landing-page.spec.ts    # was `tests/e2e-samples/`
+  ```
+  Three test "places" instead of five (`src/`-adjacent, `integration/`, `e2e/`); plus two clearly named support directories (`test-fixtures/`, `test-infra/`).
+* Update `package.json` `test`, `test:compiler`, `test:golden-ast`, `test:samples`, `test:acceptance`, `test:ci`, `test:e2e` scripts to reference the new paths. Verify by running `npm test` end-to-end.
+
+**Non-goals**
+
+* Do not change any test's logic, assertions, or fixtures' contents. The goal is layout, not behavior.
+* Do not change the unit-tests-adjacent-to-source convention. That part works.
+* Do not delete any test (other than ones absorbed into the `sample-build.test.ts` split, where the original file goes away after redistribution).
+
+**Acceptance criteria**
+
+* `npm test` passes with the same test count (or higher, if the `sample-build` split surfaces previously-bundled cases as separate tests). Test count must not decrease.
+* No file in `src/` is named `*-test-runner.ts`. Test infrastructure lives only in `test-infra/`.
+* No file under `integration/` exceeds ~600 LoC after the `sample-build` split.
+* The repo root no longer has both `test/` and `tests/`. (`tests/` is deleted after folding.)
+* `package.json` test scripts reference the new paths and the same test set runs in CI.
+* Commit message documents the file-move map (old → new) so reviewers can sanity-check that nothing was lost.
+
+***
+
+## Refactor — split `src/runtime/kernel/node-workflow-runtime.ts` (1720 LoC) #dev-ready
+
+**Goal**
+`src/runtime/kernel/node-workflow-runtime.ts` is a 1720-LoC god file: ~280 LoC of free arg-parsing helpers above the class, then a 1440-LoC `NodeWorkflowRuntime` class with 25 methods spanning workflow orchestration, step execution, prompt step lifecycle, event emission, mock execution, frame stack management, and heartbeat I/O. Reading or modifying any one concern requires holding all of them in head. Split along clean seams so each concern is in a focused module.
+
+**Context (read before starting)**
+
+* This file is actively touched by the `Handle<T>` task. If that task is in flight, **rebase on it before splitting** — do not do this work in parallel without coordinating, or the merge will be miserable.
+* The class has stateful internals (`runId`, `runDir`, `summaryFile`, `heartbeatTimer`, `frameStack`, `asyncIndices`, `env`, `cwd`, `graph`, `mockBodies`). The split must keep state in the class and move stateless helpers out, or pass state explicitly into the extracted modules. Do not invent a second source of truth.
+* Free helpers above the class (`interpolate`, `parseInlineCaptureCall`, `commaArgsToInterpolated`, `parseArgsRaw`, `parseInlineScriptAt`, `parseManagedArgAt`, `parseArgTokens`, `stripOuterQuotes`, `parsePromptSchema`, `BARE_IDENT_RE`, `MAX_EMBED`, `MAX_RECURSION_DEPTH`, `sanitizeName`, `nowIso`) — all stateless. Safe to extract.
+* Methods that are pure event emission (`emitWorkflow`, `emitStep`, `emitPromptStepStart`, `emitPromptStepEnd`, `emitPromptEvent`, `emitLog`) all call `appendRunSummaryLine` and `process.stderr.write`. They depend on the class only for `runId`, `summaryFile`, and `getAsyncIndices()`. Can move to a module that takes those as constructor args.
+* Mock execution methods (`executeMockBodyDef`, `executeMockShellBody`) are largely self-contained and could move to a sibling module.
+
+**Scope**
+
+Extract three new sibling modules under `src/runtime/kernel/`:
+
+* **`runtime-arg-parser.ts`** — every stateless free helper currently above the `NodeWorkflowRuntime` class:
+  - `interpolate`, `parseInlineCaptureCall`, `commaArgsToInterpolated`, `parseArgsRaw`, `parseInlineScriptAt`, `parseManagedArgAt`, `parseArgTokens`, `stripOuterQuotes`, `parsePromptSchema`, `sanitizeName`, `nowIso`
+  - The `BARE_IDENT_RE`, `MAX_EMBED`, `MAX_RECURSION_DEPTH` constants
+  - The `ParsedArgToken`, `PromptSchemaField` types if they are not used elsewhere in the class
+  - **Required**: extracted helpers must have unit tests (some already do indirectly via runtime tests; new direct tests live in `runtime-arg-parser.test.ts`).
+* **`runtime-event-emitter.ts`** — a small class `RuntimeEventEmitter` constructed with `{ runId, asyncIndicesGetter, env }`, exposing `emitWorkflow`, `emitStep`, `emitPromptStepStart`, `emitPromptStepEnd`, `emitPromptEvent`, `emitLog`. The runtime constructs one and delegates. No more direct `process.stderr.write(__JAIPH_EVENT__ ...)` scattered through the runtime.
+* **`runtime-mock.ts`** — `executeMockBodyDef` and `executeMockShellBody` move here as exported functions taking `{ ref, args, env, cwd, executeStepsBack }` (the last is a callback so the mock can dispatch back into the runtime for `kind: "steps"` mocks). Removes the `require("node:child_process")` and `require("node:fs")` calls that currently shadow ESM imports inside the class body — that is a code smell that should die in this task.
+
+After the split, `node-workflow-runtime.ts` keeps only:
+* The `NodeWorkflowRuntime` class
+* Workflow/step orchestration (`runDefault`, `runNamedWorkflow`, `executeSteps`, `executeStep`, frame and scope management)
+* The async-handle bookkeeping (`getAsyncIndices`, `getFrameStack`)
+* Heartbeat (`startHeartbeat`, `stopHeartbeat`, `writeHeartbeat`)
+
+Target size for `node-workflow-runtime.ts` after split: ~900–1100 LoC. Still large, but a single coherent concern (the orchestrator).
+
+**Non-goals**
+
+* Do not change behavior. Every existing test must still pass without modification.
+* Do not redesign the event format, the mock contract, or the arg-parser's accepted syntax. This is a relocation task only.
+* Do not split further than the three new modules listed. Over-decomposition is its own problem; this task is calibrated for one round of splitting.
+* Do not touch `node-workflow-runner.ts` (the CLI shim) or `run-step-exec.ts` (subprocess plumbing) — those are already correctly sized and out of scope.
+
+**Acceptance criteria**
+
+* `src/runtime/kernel/node-workflow-runtime.ts` is between 900 and 1100 LoC after the split.
+* `src/runtime/kernel/runtime-arg-parser.ts`, `runtime-event-emitter.ts`, `runtime-mock.ts` exist and own their respective concerns.
+* `runtime-arg-parser.test.ts` exists with direct unit tests for the extracted helpers.
+* `npm test` passes with no test changes other than possibly importing helpers from their new location.
+* No `require("node:...")` calls inside class methods (they are replaced by top-of-file `import` statements as part of the mock extraction).
+* The new modules have no circular imports back into `node-workflow-runtime.ts`. Dependency direction is one-way: orchestrator → helpers/emitter/mock.
+
+***

From f055601125cf62668adeaa82fe4898751b82cd30 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Mon, 20 Apr 2026 14:26:55 +0200
Subject: [PATCH 12/38] Feat: Add recover loop semantics for non-isolated run

Introduce `recover` as a first-class repair-and-retry primitive for
`run` steps, distinct from the existing one-shot `catch`. When a run
step fails, the recover block binds the error, executes a repair body,
and retries the step in a loop until it succeeds or the retry limit
(default 10, configurable via `config`) is exhausted.

Covers parser, formatter, validator, runtime, e2e acceptance test, and
docs-site syntax highlighting for the new keyword.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CHANGELOG.md                                |   1 +
 QUEUE.md                                    |  39 -----
 docs/assets/js/main.js                      |   1 +
 docs/configuration.md                       |   2 +
 docs/grammar.md                             |  52 ++++++-
 docs/index.html                             |  86 ++++++-----
 docs/jaiph-skill.md                         |  14 +-
 docs/language.md                            |  47 +++++-
 e2e/test_all.sh                             |   1 +
 e2e/tests/130_run_recover_loop.sh           | 113 ++++++++++++++
 src/cli/run/progress.ts                     |   7 +-
 src/format/emit.test.ts                     |  37 +++++
 src/format/emit.ts                          |  18 ++-
 src/parse/metadata.ts                       |   7 +
 src/parse/parse-steps.test.ts               | 117 ++++++++++++++-
 src/parse/steps.ts                          | 157 ++++++++++++++++++++
 src/parse/workflow-brace.ts                 |   7 +-
 src/parse/workflows.ts                      |   9 +-
 src/runtime/kernel/node-workflow-runtime.ts |  27 ++++
 src/transpile/validate.ts                   |  12 ++
 src/types.ts                                |   6 +-
 test/sample-build.test.ts                   | 154 +++++++++++++++++++
 22 files changed, 822 insertions(+), 92 deletions(-)
 create mode 100755 e2e/tests/130_run_recover_loop.sh

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ccd2e664..f6bb9448 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ## All changes
 
+- **Feature — Language/Runtime:** `recover` loop semantics for `run` steps — `recover` is a new first-class repair-and-retry primitive for `run` steps, distinct from `catch`. Syntax: `run ref() recover(err) { … }`. On failure, the binding receives merged stdout+stderr, the repair body executes, and the target is retried automatically. The loop stops when the target succeeds or the retry limit is exhausted. The default retry limit is 10; override per-module with `run.recover_limit` in a `config` block. `catch` remains unchanged (one-shot recovery). `recover` and `catch` are mutually exclusive on the same step. Supported for non-isolated, non-async `run` in workflows only. The docs-site syntax highlighter (`docs/assets/js/main.js`) recognizes `recover` as a keyword. Implementation: `recoverLoop` field on `WorkflowStepDef` in `src/types.ts`, `parseRunRecoverStep` in `src/parse/steps.ts`, retry loop in `NodeWorkflowRuntime`, `run.recover_limit` config key in `src/parse/metadata.ts`, formatter round-trip in `src/format/emit.ts`, validation in `src/transpile/validate.ts`. Parser, formatter, validation, runtime, and E2E tests added. Docs updated (`docs/language.md`, `docs/grammar.md`, `docs/configuration.md`, `docs/jaiph-skill.md`, `docs/index.html`).
 - **Feature — Docker:** Workspace immutability contract and patch export — Docker runs now enforce an explicit immutability contract: the host workspace is bind-mounted read-only and the writable `/jaiph/workspace` inside the container is a sandbox-local copy-on-write layer discarded on exit. The only persistence channel from a Docker run to the host is the run-artifacts directory (`/jaiph/run` → host `.jaiph/runs`). During teardown, the runtime automatically exports a `workspace.patch` file (best-effort `git diff --binary` after `git add -N .`) into the run directory so sandbox edits can be reviewed or applied on the host with `git apply`. Patch export is runtime teardown behavior owned by `NodeWorkflowRuntime`, not workflow logic — it runs regardless of workflow exit status and failures are reported on stderr without changing the workflow's reported status. When there are no workspace changes, `workspace.patch` is omitted (not created). Non-Docker (local) runs are unaffected. Implementation: `exportWorkspacePatch()` in `src/runtime/docker.ts`, `exportPatchIfDocker()` in `src/runtime/kernel/node-workflow-runtime.ts`. Unit tests for non-empty patch, empty patch, and non-git directory added. Docs updated (`docs/sandboxing.md`, `docs/architecture.md`, `docs/artifacts.md`).
 - **Feature — Docker:** Default Docker when not CI or unsafe — Docker sandboxing is now **on by default** for local development. When neither `CI=true` nor `JAIPH_UNSAFE=true` is set in the environment, `runtime.docker_enabled` defaults to `true`. In CI environments or when `JAIPH_UNSAFE=true` is set, the default is `false`. Explicit overrides (`JAIPH_DOCKER_ENABLED` env var or in-file `runtime.docker_enabled`) always take precedence over the default rule. `JAIPH_UNSAFE=true` is the new explicit escape hatch for local development when Docker is unwanted. Implementation: `resolveDockerConfig()` in `src/runtime/docker.ts`. Unit tests for all env combinations added. Docs updated (`docs/sandboxing.md`, `docs/configuration.md`).
 - **Feature — Docker:** Harden Docker execution environment — Docker sandboxing now enforces least-privilege defaults and explicit boundary controls. Containers launch with `--cap-drop ALL --cap-add SYS_ADMIN --security-opt no-new-privileges`, dropping all Linux capabilities except the one required for fuse-overlayfs and preventing privilege escalation. A mount denylist rejects dangerous host paths (`/`, `/var/run/docker.sock`, `/run/docker.sock`, `/proc`, `/sys`, `/dev` and their subpaths) at validation time with `E_VALIDATE_MOUNT` — both in `validateMounts` and at `buildDockerArgs` time. An environment variable denylist (`SSH_*`, `GPG_*`, `AWS_*`, `GCP_*`, `AZURE_*`, `GOOGLE_*`, `DOCKER_*`, `KUBE*`, `NPM_TOKEN*`) prevents host credentials from leaking into the container; only `JAIPH_*` (except `JAIPH_DOCKER_*`) and agent prefixes (`ANTHROPIC_*`, `CLAUDE_*`, `CURSOR_*`) cross the container boundary. New exports: `validateMountHostPath`, `isEnvDenied`, `ENV_DENYLIST_PREFIXES`. Documentation adds a threat-model section (what Docker does and does not protect against), a failure-modes reference table (`E_DOCKER_*` / `E_VALIDATE_MOUNT` / `E_TIMEOUT`), expanded network-mode guidance, and the env denylist specification. Implementation: `src/runtime/docker.ts` (mount denylist, env denylist, security flags), `src/runtime/docker.test.ts` (unit tests for all new paths). Docs updated (`docs/sandboxing.md`).
diff --git a/QUEUE.md b/QUEUE.md
index 68afd335..f4b47f6f 100644
--- a/QUEUE.md
+++ b/QUEUE.md
@@ -13,45 +13,6 @@ Process rules:
 
 ***
 
-## Language/Runtime — add `recover` loop semantics for non-isolated `run` #dev-ready
-
-**Goal**
-Add `recover` as a first-class repair-and-retry primitive distinct from `catch`. Ship for non-isolated, non-async `run` first. Async composition lands in the next task, not here.
-
-**Scope**
-
-* Keep existing `catch` behavior as one-attempt try/catch.
-* Add:
-
-  ```jh
-  run sth() recover(err) {
-    ...
-  }
-  ```
-
-  with loop semantics: try, bind failure, run repair block, retry, stop on success or retry-limit exhaustion.
-* Add a small explicit retry limit (default 10) with config override.
-* Keep the runtime behavior simple and observable; do not introduce speculative control-flow abstractions.
-
-**Required tests**
-
-* Parser / formatter / validation coverage for `recover`.
-* Runtime tests for:
-  - success on first attempt
-  - one or more repair loops before success
-  - retry limit exhaustion
-  - retry limit configured via `config`
-* At least one acceptance test using `recover` to repair and retry a failing run.
-
-**Acceptance criteria**
-
-* `recover` is distinct from `catch`.
-* The retry limit is explicit and configurable.
-* Tests prove loop behavior and limit handling.
-* The docs-site Jaiph syntax highlighter (`docs/assets/js/main.js`, the `STATEMENT_KEYWORDS` set and any keyword-flow special cases) recognizes `recover` as a keyword. Any `.jh` code block on the docs site that uses `recover` renders with the keyword colored.
-
-***
-
 ## Runtime — spec and implement `Handle<T>` for `run async`, including `recover` composition #dev-ready
 
 **Goal**
diff --git a/docs/assets/js/main.js b/docs/assets/js/main.js
index 79a707ae..13beb83b 100644
--- a/docs/assets/js/main.js
+++ b/docs/assets/js/main.js
@@ -21,6 +21,7 @@
         "test",
         "ensure",
         "catch",
+        "recover",
         "run",
         "prompt",
         "returns",
diff --git a/docs/configuration.md b/docs/configuration.md
index 5a1b2891..56fc5266 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -136,6 +136,7 @@ These control runtime behavior unrelated to the agent.
 | `run.logs_dir` | string | `.jaiph/runs` | `JAIPH_RUNS_DIR` | Step log directory. Relative paths are joined with the workspace root; absolute paths are used as-is. |
 | `run.debug` | boolean | `false` | `JAIPH_DEBUG` | Enables debug tracing for the run. |
 | `run.inbox_parallel` | boolean | `false` | `JAIPH_INBOX_PARALLEL` | Dispatch inbox route targets concurrently. See [Inbox — Parallel dispatch](inbox.md#parallel-dispatch). |
+| `run.recover_limit` | integer | `10` | _(no env override)_ | Maximum number of retry attempts for `run … recover` loops before the step fails. See [Language — `recover`](language.md#recover--repair-and-retry-loop). |
 
 ### Module keys
 
@@ -323,6 +324,7 @@ Quick reference for all in-file keys and their environment variable equivalents:
 | `run.logs_dir` | `JAIPH_RUNS_DIR` |
 | `run.debug` | `JAIPH_DEBUG` |
 | `run.inbox_parallel` | `JAIPH_INBOX_PARALLEL` |
+| `run.recover_limit` | _(no env override)_ |
 | `runtime.docker_enabled` | `JAIPH_DOCKER_ENABLED` |
 | `runtime.docker_image` | `JAIPH_DOCKER_IMAGE` |
 | `runtime.docker_network` | `JAIPH_DOCKER_NETWORK` |
diff --git a/docs/grammar.md b/docs/grammar.md
index 521355d8..d3287bec 100644
--- a/docs/grammar.md
+++ b/docs/grammar.md
@@ -42,7 +42,7 @@ The compiler enforces these boundaries at every call site. Using a script where
 
 Jaiph enforces a strict boundary between orchestration and execution. Workflows and rules contain only Jaiph steps. Bash lives in `script` bodies.
 
-- **Workflows** — Named sequences of Jaiph steps: `ensure`, `run`, `prompt`, `const`, `fail`, `return`, `log`/`logerr`, inbox `send` (`channel <- …`), `match`, `if`, `run async`, `ensure … catch`, and `run … catch`. Any line that is not a recognized step is a parse error — extract bash to a `script` and call it with `run`.
+- **Workflows** — Named sequences of Jaiph steps: `ensure`, `run`, `prompt`, `const`, `fail`, `return`, `log`/`logerr`, inbox `send` (`channel <- …`), `match`, `if`, `run async`, `ensure … catch`, `run … catch`, and `run … recover`. Any line that is not a recognized step is a parse error — extract bash to a `script` and call it with `run`.
 
 - **Rules** — Named blocks of structured Jaiph steps: `ensure` (other rules), `run` (scripts only — not workflows), `const`, `match`, `if`, `fail`, `log`/`logerr`, `return "…"`, `ensure … catch`, `run … catch`. Rules cannot use `prompt`, inbox send/route, or `run async`.
 
@@ -392,6 +392,46 @@ Syntax rules:
 - All call arguments must appear inside the parentheses **before** `catch`.
 - `catch` must be followed by at least one recovery step after the bindings.
 
+### `run … recover` — Repair-and-Retry Loop
+
+`recover` adds loop semantics to a `run` step. Unlike `catch` (which runs the recovery body once and stops), `recover` retries the target after each repair attempt until it succeeds or the retry limit is exhausted.
+
+```jaiph
+# Single-statement recover
+run deploy() recover(err) run fix_deploy()
+
+# Block recover
+run deploy(env) recover(err) {
+  log "Deploy failed: ${err}"
+  run auto_repair(env)
+}
+```
+
+**Loop behavior:**
+
+1. Execute the `run` target.
+2. If it succeeds, continue (the `recover` body never runs).
+3. If it fails, bind merged stdout+stderr to the binding (e.g. `err`), execute the repair body, then go to step 1.
+4. If the retry limit is reached and the target still fails, the step fails with the last error.
+
+**Retry limit:** Default is **10**. Override per-module with `run.recover_limit`:
+
+```jaiph
+config {
+  run.recover_limit = 3
+}
+```
+
+**Bindings** follow the same rules as `catch`:
+- Exactly one binding is required. The binding receives merged stdout+stderr from the failed execution.
+
+Syntax rules:
+- `recover` must be followed by `(<name>)` — bare `recover` or `recover {` without bindings is `E_PARSE`.
+- All call arguments must appear inside the parentheses **before** `recover`.
+- `recover` must be followed by at least one recovery step after the bindings.
+- `recover` and `catch` are mutually exclusive on the same `run` step.
+- `recover` is not supported on `ensure` or `run async` steps.
+
 ### `prompt` — Agent Interaction
 
 Sends text to the configured agent backend. The prompt body can be supplied in three forms: a single-line string literal, a bare identifier referencing an existing binding, or a triple-quoted multiline block.
@@ -827,7 +867,7 @@ workflow_config = config_block ;
   (* optional per-workflow override; must appear before steps;
      only agent.* and run.* keys allowed; runtime.* and module.* yield E_PARSE *)
 
-workflow_step   = ensure_stmt | run_stmt | run_catch_stmt | run_async_stmt | prompt_stmt | prompt_capture_stmt
+workflow_step   = ensure_stmt | run_stmt | run_catch_stmt | run_recover_stmt | run_async_stmt | prompt_stmt | prompt_capture_stmt
                 | const_decl_step | return_stmt
                 | fail_stmt | log_stmt | logerr_stmt | send_stmt
                 | match_stmt | if_stmt | comment_line ;
@@ -867,6 +907,7 @@ logerr_stmt     = "logerr" ( double_quoted_string | triple_quoted_block | IDENT
 
 ensure_stmt     = "ensure" call_ref [ "catch" catch_bindings catch_body ] ;
 run_catch_stmt  = "run" call_ref "catch" catch_bindings catch_body ;
+run_recover_stmt = "run" call_ref "recover" recover_bindings recover_body ;
 run_stmt        = "run" ( call_ref | inline_script ) ;
 call_ref        = REF "(" [ call_args ] ")" ;  (* parentheses always required *)
 call_arg        = double_quoted_string | IDENT | "${" IDENT "}"
@@ -881,7 +922,9 @@ returns_schema  = "returns" double_quoted_string ;
 
 catch_bindings  = "(" IDENT ")" ;  (* failure payload *)
 catch_body      = single_workflow_stmt | "{" { workflow_step } "}" ;
-single_workflow_stmt = ensure_stmt | run_stmt | run_catch_stmt | prompt_stmt | prompt_capture_stmt
+recover_bindings = "(" IDENT ")" ;  (* failure payload — same as catch *)
+recover_body    = single_workflow_stmt | "{" { workflow_step } "}" ;
+single_workflow_stmt = ensure_stmt | run_stmt | run_catch_stmt | run_recover_stmt | prompt_stmt | prompt_capture_stmt
                 | const_decl_step
                 | return_stmt | fail_stmt | log_stmt | logerr_stmt
                 | send_stmt ;
@@ -904,7 +947,7 @@ Validation rules:
 4. **Unified namespace:** channels, rules, workflows, scripts, script import aliases, and top-level `const` share one namespace per module.
 5. `ensure` must target a rule. `run` in a workflow targets a workflow or script. `run` in a rule targets a script only. These rules also apply to `return run` and `return ensure` forms.
 6. Channel references in `send` must resolve to declared channels. Route targets on channel declarations must be workflows with exactly 3 parameters. Route declarations inside workflow bodies are rejected at parse time.
-7. `ensure … catch` and `run … catch` argument ordering: all arguments inside parentheses before `catch`.
+7. `ensure … catch`, `run … catch`, and `run … recover` argument ordering: all arguments inside parentheses before `catch`/`recover`.
 8. Shell redirection (`>`, `|`, `&`) after `run`/`ensure` is rejected — use a script.
 9. **Type crossing:** `string` and `script` are non-interchangeable primitive types (see [Types](#types)). `prompt` rejects script names; `run` rejects string consts; assigning a script to a `const` or interpolating a script name with `${…}` is rejected. Each crossing produces an actionable `E_VALIDATE` message.
 10. **Explicit nested managed calls:** Bare call-like forms in argument position (`run foo(bar())`, `run foo(rule_bar())`) are rejected — add the missing `run` or `ensure` keyword. Bare inline script calls in arguments (`run foo(\`echo aaa\`())`) are also rejected — add `run`. Valid forms: `run foo(run bar())`, `run foo(ensure rule_bar())`, `run foo(run \`echo aaa\`())`.
@@ -923,6 +966,7 @@ At runtime, the Node workflow runtime interprets the AST directly:
 - **Script isolation:** Managed subprocesses with only essential variables. Module-scoped variables not visible.
 - **Prompt + schema:** JSON extraction and schema validation via the JS kernel. Exit codes: 0=ok, 1=parse error, 2=missing field, 3=type mismatch.
 - **ensure/run … catch:** On failure, the recovery body runs **once**. There is no retry loop. Requires explicit bindings: `catch (failure) { … }`. The binding gets the merged stdout+stderr from the failed execution.
+- **run … recover:** Repair-and-retry loop. On failure, the binding gets merged stdout+stderr, the repair body runs, and the target is retried. Loop stops on success or when `run.recover_limit` (default 10) is exhausted. Requires explicit bindings: `recover(err) { … }`.
 - **Recursion safety:** There is a hard recursion depth limit of 256. Exceeding it produces a runtime error.
 - **Assignment capture:** Rules and workflows use explicit `return "…"`. Scripts use stdout.
 - **`run async`:** Promise-based concurrency. Implicit join via `Promise.allSettled` before workflow returns. Failures aggregated.
diff --git a/docs/index.html b/docs/index.html
index 5c7793bb..9c7c629a 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -269,53 +269,55 @@ <h2 id="samples">Samples</h2>
 
                 <div class="code-tab-panel" data-panel="recover-loop-jh" data-sample="recover-loop"
                     data-sample-file="recover_loop.jh">
-                    <p>The <code>ensure … catch</code> pattern checks a rule and, on failure,
-                        runs a recovery block. The <code>catch (failure)</code> binding captures
-                        the merged stdout+stderr from the failed check.
-                        Recovery runs <strong>once</strong> — for retries, the workflow calls itself
-                        recursively (<code>run default()</code>).</p>
+                    <p>The <code>run … recover</code> pattern is a first-class repair-and-retry loop.
+                        When the target fails, the <code>recover(err)</code> body runs, then the target
+                        is retried automatically. The loop stops on success or when the retry limit
+                        (default 10, configurable via <code>run.recover_limit</code>) is exhausted.</p>
                     <pre><code class="language-jaiph" data-sample-source>#!/usr/bin/env jaiph
 
-# Recursive recovery: when a check fails, prompt an agent to fix it,
-# then retry via run default(). Jaiph CI uses the same pattern to
-# auto-fix failing tests — see .jaiph/ensure_ci_passes.jh
-script check_report = `test -f report.txt`
+# recover loop: when check() fails, fix() repairs the problem,
+# and Jaiph automatically retries check(). No manual recursion needed.
+script check_gate = `test -f .gate_passed`
 
-rule report_exists() {
-  run check_report()
+workflow check() {
+  run check_gate()
+}
+
+script do_fix = `touch .gate_passed`
+
+workflow fix() {
+  run do_fix()
 }
 
 workflow default() {
-  ensure report_exists() catch (failure) {
-    prompt "report.txt is missing. Create it with a short dummy summary."
-    run default()
+  run check() recover(err) {
+    run fix()
   }
 }</code></pre>
-                    <p>In the run below, <code>report_exists</code> fails once. The agent creates
-                        <code>report.txt</code>, and the recursive <code>run default()</code> retries
-                        successfully.
+                    <p>In the run below, <code>check()</code> fails once. The recover body runs
+                        <code>fix()</code>, and Jaiph retries <code>check()</code> which now succeeds.
                     </p>
                     <pre><code class="jaiph-run" data-sample-output="run"><span class="run-command">➜  ./recover_loop.jh</span>
 
 Jaiph: Running recover_loop.jh
 
 <span class="run-keyword">workflow</span> default
-  <span class="run-marker">▸</span> <span class="run-keyword">rule</span> report_exists
-  <span class="run-marker">·</span>   <span class="run-marker">▸</span> <span class="run-keyword">script</span> check_report
-  <span class="run-marker">·</span>   <span class="run-fail">✗ script check_report (0s)</span>
-  <span class="run-fail">✗ rule report_exists (0s)</span>
-  <span class="run-marker">▸</span> <span class="run-keyword">prompt</span> cursor "report.txt is missin..."
-  <span class="run-pass">✓</span> <span class="run-time">prompt cursor (5s)</span>
-  <span class="run-marker">▸</span> <span class="run-keyword">workflow</span> default
-  <span class="run-marker">·</span>   <span class="run-marker">▸</span> <span class="run-keyword">rule</span> report_exists
-  <span class="run-marker">·</span>   <span class="run-marker">·</span>   <span class="run-marker">▸</span> <span class="run-keyword">script</span> check_report
-  <span class="run-marker">·</span>   <span class="run-marker">·</span>   <span class="run-pass">✓</span> <span class="run-time">script check_report (0s)</span>
-  <span class="run-marker">·</span>   <span class="run-pass">✓</span> <span class="run-time">rule report_exists (0s)</span>
-  <span class="run-pass">✓</span> <span class="run-time">workflow default (0.1s)</span>
-
-<span class="run-pass">✓ PASS</span> <span class="run-keyword">workflow</span> default <span class="run-time">(5.5s)</span></code></pre>
-                    <p>Jaiph's own CI uses this same pattern to auto-fix failing tests — see
-                        <a href="https://github.com/jaiphlang/jaiph/blob/main/.jaiph/ensure_ci_passes.jh"><code>.jaiph/ensure_ci_passes.jh</code></a>.</p>
+  <span class="run-marker">▸</span> <span class="run-keyword">workflow</span> check
+  <span class="run-marker">·</span>   <span class="run-marker">▸</span> <span class="run-keyword">script</span> check_gate
+  <span class="run-marker">·</span>   <span class="run-fail">✗ script check_gate (0s)</span>
+  <span class="run-fail">✗ workflow check (0s)</span>
+  <span class="run-marker">▸</span> <span class="run-keyword">workflow</span> fix
+  <span class="run-marker">·</span>   <span class="run-marker">▸</span> <span class="run-keyword">script</span> do_fix
+  <span class="run-marker">·</span>   <span class="run-pass">✓</span> <span class="run-time">script do_fix (0s)</span>
+  <span class="run-pass">✓</span> <span class="run-time">workflow fix (0s)</span>
+  <span class="run-marker">▸</span> <span class="run-keyword">workflow</span> check
+  <span class="run-marker">·</span>   <span class="run-marker">▸</span> <span class="run-keyword">script</span> check_gate
+  <span class="run-marker">·</span>   <span class="run-pass">✓</span> <span class="run-time">script check_gate (0s)</span>
+  <span class="run-pass">✓</span> <span class="run-time">workflow check (0s)</span>
+
+<span class="run-pass">✓ PASS</span> <span class="run-keyword">workflow</span> default <span class="run-time">(0.5s)</span></code></pre>
+                    <p>For one-shot failure handling without retry, use <code>catch</code> instead. See
+                        <a href="/language#recover--repair-and-retry-loop">Language — recover</a>.</p>
                 </div>
 
                 <div class="code-tab-panel" data-panel="inbox-pipeline-jh" data-sample="agent-inbox"
@@ -456,9 +458,10 @@ <h3>Language</h3>
                     See <a href="inbox">Inbox &amp; Dispatch</a>.
                 </p>
                 <p><strong>Failure recovery.</strong> <code>ensure … catch</code> and <code>run … catch</code>
-                    handle failures inline: when a rule or script fails, the recovery body runs <strong>once</strong>
-                    (like a <code>catch</code> clause). For retries, use explicit recursion. Both forms work in workflows
-                    and rules. See <a href="grammar">Grammar</a>.
+                    handle failures inline: when a rule or script fails, the recovery body runs <strong>once</strong>.
+                    For automatic repair-and-retry, use <code>run … recover</code> — a loop that retries
+                    the target after each repair attempt (configurable limit, default 10). Both <code>catch</code>
+                    and <code>recover</code> work in workflows. See <a href="grammar">Grammar</a>.
                 </p>
 
                 <h3>Runtime</h3>
@@ -596,8 +599,15 @@ <h3>Jaiph workflows</h3>
                     </dt>
                     <dd>Failure recovery: when the target fails, the recovery body runs <strong>once</strong>
                         (like a <code>catch</code> clause). <code>catch</code> requires explicit bindings
-                        in parentheses. Works in both workflows and rules. For retries, use explicit
-                        recursion in the recovery body.
+                        in parentheses. Works in both workflows and rules.
+                    </dd>
+
+                    <dt><code>run ref() recover (err) { … }</code></dt>
+                    <dd>Repair-and-retry loop: when the target fails, the repair body runs and the target
+                        is retried automatically. Stops on success or when the retry limit is exhausted
+                        (default 10, configurable via <code>run.recover_limit</code>). <code>recover</code>
+                        requires explicit bindings. Workflows only. See
+                        <a href="language#recover--repair-and-retry-loop">Language</a>.
                     </dd>
 
                     <dt><code>match var { "lit" =&gt; …, /re/ =&gt; …, _ =&gt; … }</code></dt>
diff --git a/docs/jaiph-skill.md b/docs/jaiph-skill.md
index f170b6ef..9d6964e8 100644
--- a/docs/jaiph-skill.md
+++ b/docs/jaiph-skill.md
@@ -26,11 +26,11 @@ The **JS kernel** (`src/runtime/kernel/`) handles **prompt** execution, **manage
 **Concepts:**
 
 - **Rules** — Structured checks: `ensure` (other **rules** only), `run` (**scripts** only — not workflows), `const`, `match`, `if`, `fail`, `log`/`logerr`, `return "…"` / `return run script()` / `return ensure rule()`, `ensure … catch`, `run … catch`. No raw shell lines, `prompt`, inbox send/route, or `run async`. Under `jaiph run`, rule bodies are executed **in-process** by the Node runtime; when a rule runs a **script**, that script is a normal managed subprocess (same as scripts from workflows) — see [Sandboxing](sandboxing.md).
-- **Workflows** — Named sequences of **Jaiph-only** steps: `ensure`, `run`, `prompt`, `const`, `fail`, `return`, `log`/`logerr`, inbox **send** (`channel_ref <- …`), `match`, `if`, `run async`, `ensure … catch`, `run … catch`. Route declarations (`->`) belong at the top level on `channel` declarations, **not** inside workflow bodies — a `->` inside a body is a parse error. Unrecognized lines are errors — put bash in **`script`** definitions and call with `run`.
+- **Workflows** — Named sequences of **Jaiph-only** steps: `ensure`, `run`, `prompt`, `const`, `fail`, `return`, `log`/`logerr`, inbox **send** (`channel_ref <- …`), `match`, `if`, `run async`, `ensure … catch`, `run … catch`, `run … recover`. Route declarations (`->`) belong at the top level on `channel` declarations, **not** inside workflow bodies — a `->` inside a body is a parse error. Unrecognized lines are errors — put bash in **`script`** definitions and call with `run`.
 - **Scripts** — Top-level **`script`** definitions are **bash (or shebang interpreter) source**, not Jaiph orchestration. Defined with `` script name = `body` `` (single-line backtick) or `` script name = ```[lang] ... ``` `` (fenced block). Double-quoted string bodies (`script name = "body"`) and bare identifier bodies (`script name = varName`) are **removed** — both produce parse errors with guidance to use backtick delimiters. The compiler treats all script bodies as **opaque text**: it does not parse lines as Jaiph steps, reject keywords, strip quotes, or validate cross-script calls. This means embedded `node -e` heredocs, inline Python, `const` assignments in JS, and any other valid shell construct compile without interference. Jaiph interpolation (`${...}`) is **forbidden** in **single-line backtick** script bodies — use `$1`, `$2` positional arguments to pass data from orchestration to scripts. In **fenced** (triple-backtick) blocks, `${...}` is passed through to the shell as standard parameter expansion (`${VAR}`, `${VAR:-default}`, etc.). A single-backtick body containing a newline is a hard parse error — use a fenced block for multi-line scripts. Use `return N` / `return $?` for exit status and **stdout** (`echo` / `printf`) for string data to callers. From a **workflow** or **rule**, call with **`run fn()`**. Can be exported (`export script name = ...`) for use by importing modules. Cannot be used with `ensure`, are not valid inbox route targets, and must not be invoked through `$(...)` or as a bare shell step. **Polyglot scripts:** use a fence lang tag (`` ```<tag> ``) to select an interpreter — the tag maps directly to `#!/usr/bin/env <tag>`. Any tag is valid (no hardcoded allowlist). For example: `` ```node ``, `` ```python3 ``, `` ```ruby ``, `` ```lua ``. Alternatively, if no fence tag is present, the first non-empty body line may start with `#!` (e.g. `#!/usr/bin/env lua`), which becomes the script's shebang and the body is emitted verbatim (you cannot combine a fence tag with a manual shebang — that is an error). Without either, `#!/usr/bin/env bash` is used and the emitter applies only lightweight bash-specific transforms (`return` normalization, `local`/`export`/`readonly` spacing, import alias resolution). Scripts are extracted to a `scripts/` directory under the run output tree (`jaiph run --target <dir>` sets that tree; without `--target` the CLI uses a temporary directory) and executed via **`JAIPH_SCRIPTS`**. **Inline scripts:** For trivial one-off commands, use `` run `body`(args) `` or `` run ```lang...body...```(args) `` directly in a workflow or rule step instead of declaring a named `script` definition. The body (single backtick for one-liners or triple backtick for multi-line) comes before the parentheses; optional comma-separated arguments go inside the parentheses: `` run `echo $1`("hello") ``. Fenced blocks support lang tags for polyglot inline scripts: `` run ```python3 ... ```() ``. Capture forms: `` const x = run `echo val`() `` and `` const x = run ```...```() ``. The old `run script() "body"` form is **removed** — use the backtick forms instead. Inline scripts use deterministic hash-based artifact names (`__inline_<hash>`) and run with the same isolation as named scripts. `run async` with inline scripts is not supported.
 - **Channels** — Top-level `channel <name> [-> workflow, ...]` declarations with optional inline routing; **send** uses `channel_ref <- …`. Routes are declared on the channel declaration, not inside workflow bodies (see [Inbox & Dispatch](inbox.md)). Channel names share the per-module namespace with rules, workflows, scripts, and module-scoped `local` / `const` variables.
 
-Step semantics (`ensure`, `run`, `prompt`, `catch`, `match`, `if`, `log`, `fail`, `return`, `send`, `run async`) are detailed in the **Steps** section below.
+Step semantics (`ensure`, `run`, `prompt`, `catch`, `recover`, `match`, `if`, `log`, `fail`, `return`, `send`, `run async`) are detailed in the **Steps** section below.
 
 **Audience:** Agents that produce or edit `.jh` files.
 
@@ -95,7 +95,7 @@ Prefer composable modules over one large file.
 - **Module-scoped variables:** `local name = value` or `const name = value` (same value forms). Prefer **`const`** for new files. Values can be single-line `"..."` strings, triple-quoted `"""..."""` multiline strings, or bare tokens. A double-quoted string that spans multiple lines is rejected — use `"""..."""` instead. Accessible as `${name}` inside orchestration strings in the same module. Names share the unified namespace with channels, rules, workflows, and scripts — duplicates are `E_PARSE`. Not exportable; module-scoped only.
 - **Steps:**
   - **ensure** — `ensure ref` or `ensure ref([args...])` runs a rule (local or `alias.rule_name`). **Parentheses are optional when passing zero arguments** — `ensure check` is equivalent to `ensure check()`. When arguments are present, parentheses are required with comma-separated expressions. **Bare identifier arguments** are supported and preferred: `ensure check(status)` is equivalent to `ensure check("${status}")` — the identifier must reference a known variable (`const`, capture, or named parameter); unknown names fail with `E_VALIDATE`. **Standalone `"${identifier}"` in call arguments is rejected** — use the bare form instead. Optionally `ensure ref([args]) catch (<name>) <body>` or `ensure ref([args]) catch (<name>, <attempt>) <body>`: the recovery body runs **once** on failure (like a catch clause). There is no retry loop — for retries, use explicit recursion. The first binding (e.g. `failure`) receives the full merged stdout+stderr from the failed rule execution, including output from nested scripts and rules. The optional second binding (e.g. `attempt`) receives the attempt number (always `"1"`). Full output still lives in step **`.out` / `.err`** artifacts. If the failure binding is empty for your rule, persist diagnostics before prompting or assert non-empty. Works in both workflows and rules.
-  - **run** — `run ref` or `run ref([args...])` runs a workflow or script (local or `alias.name`). **Parentheses are optional when passing zero arguments** — `run setup` is equivalent to `run setup()`. When arguments are present, parentheses are required with comma-separated expressions. **`run` does not forward args by default** — pass named params explicitly (e.g. `run wf(task)`, `run util_fn(name)`). **Bare identifier arguments** are supported and preferred: `run greet(name)` is equivalent to `run greet("${name}")` — the identifier must reference a known variable (`const`, capture, or named parameter); unknown names fail with `E_VALIDATE`. **Standalone `"${identifier}"` in call arguments is rejected** — use the bare form instead (e.g. `run greet(name)` not `run greet("${name}")`). Quoted strings with additional text around the interpolation (e.g. `"prefix_${name}"`) are still allowed. Jaiph keywords cannot be used as bare identifiers. **Nested managed calls in arguments** are supported with explicit keywords: `run foo(run bar())`, `run foo(ensure check())`, `run foo(run \`echo ok\`())`. Bare call-like forms in arguments (`run foo(bar())`, `run foo(\`echo ok\`())`) are rejected — add the `run` or `ensure` keyword. Optionally `run ref([args]) catch (<name>) <body>`: the recovery body runs **once** on failure (same semantics as `ensure … catch`). Works in both workflows and rules. Also supports **inline scripts**: `` run `body`(args) `` or `` run ```lang...body...```(args) `` — see Scripts section above.
+  - **run** — `run ref` or `run ref([args...])` runs a workflow or script (local or `alias.name`). **Parentheses are optional when passing zero arguments** — `run setup` is equivalent to `run setup()`. When arguments are present, parentheses are required with comma-separated expressions. **`run` does not forward args by default** — pass named params explicitly (e.g. `run wf(task)`, `run util_fn(name)`). **Bare identifier arguments** are supported and preferred: `run greet(name)` is equivalent to `run greet("${name}")` — the identifier must reference a known variable (`const`, capture, or named parameter); unknown names fail with `E_VALIDATE`. **Standalone `"${identifier}"` in call arguments is rejected** — use the bare form instead (e.g. `run greet(name)` not `run greet("${name}")`). Quoted strings with additional text around the interpolation (e.g. `"prefix_${name}"`) are still allowed. Jaiph keywords cannot be used as bare identifiers. **Nested managed calls in arguments** are supported with explicit keywords: `run foo(run bar())`, `run foo(ensure check())`, `run foo(run \`echo ok\`())`. Bare call-like forms in arguments (`run foo(bar())`, `run foo(\`echo ok\`())`) are rejected — add the `run` or `ensure` keyword. Optionally `run ref([args]) catch (<name>) <body>`: the recovery body runs **once** on failure (same semantics as `ensure … catch`). Works in both workflows and rules. Optionally `run ref([args]) recover (<name>) <body>`: repair-and-retry loop — on failure, binds error output, runs the repair body, and retries the target. Loop stops on success or when `run.recover_limit` (default 10) is exhausted. `recover` and `catch` are mutually exclusive on the same step. Workflows only. Also supports **inline scripts**: `` run `body`(args) `` or `` run ```lang...body...```(args) `` — see Scripts section above.
   - **log** — `log "message"` writes the expanded message to **stdout** and emits a **`LOG`** event; the CLI shows it in the progress tree at the current depth. Double-quoted string; `${identifier}` interpolation works at runtime. For multiline messages, use triple quotes: `log """..."""`. **Bare identifier form:** `log foo` (no quotes) expands to `log "${foo}"` — the variable's value is logged. Works with `const`, capture, and named parameters. **Inline capture interpolation** is also supported: `${run ref([args])}` and `${ensure ref([args])}` execute a managed call and inline the result (e.g. `log "Got: ${run greet()}"`). Nested inline captures are rejected. **`LOG`** events and `run_summary.jsonl` store the **same** message string (JSON-escaped for the payload). No spinner, no timing — a static annotation. See [CLI Reference](cli.md) for tree formatting. Useful for marking workflow phases (e.g. `log "Starting analysis phase"`).
   - **logerr** — `logerr "message"` is identical to `log` except the message goes to **stderr** and the event type is **`LOGERR`**. In the progress tree, `logerr` lines use a red `!` instead of the dim `ℹ` used by `log`. Same quoting, interpolation, bare identifier, and triple-quote rules as `log` (e.g. `logerr err_msg`, `logerr """..."""`).
   - **Send** — After `<-`, use a **double-quoted literal**, **triple-quoted block** (`channel <- """..."""`), **`${var}`**, or **`run ref([args])`**. An explicit RHS is always required — bare `channel <-` (forward syntax) has been removed. Raw shell on the RHS is rejected — use `const x = run helper()` then `channel <- "${x}"`, or `channel <- run fmt_fn()`. Combining capture and send (`name = channel <- …`) is `E_PARSE`. See [Inbox & Dispatch](inbox.md).
@@ -111,12 +111,18 @@ Prefer composable modules over one large file.
 **Quick reference examples:**
 
 ```jaiph
-# catch — failure handling with retry via recursion
+# catch — one-shot failure handling
 ensure ci_passes() catch (failure) {
   prompt "CI failed — fix the code."
   run deploy(env)
 }
 
+# recover — repair-and-retry loop (retries until success or limit)
+run deploy(env) recover(err) {
+  log "Deploy failed: ${err}"
+  run auto_repair(env)
+}
+
 # match — value branching (statement and expression forms)
 const label = match status {
   "ok" => "success"
diff --git a/docs/language.md b/docs/language.md
index c1a82d16..e9528b76 100644
--- a/docs/language.md
+++ b/docs/language.md
@@ -192,7 +192,7 @@ workflow deploy(env, version) {
 }
 ```
 
-Workflows support all step types: `run`, `ensure`, `prompt`, `const`, `log`, `logerr`, `fail`, `return`, `send`, `match`, `if`, `run async`, and `catch`.
+Workflows support all step types: `run`, `ensure`, `prompt`, `const`, `log`, `logerr`, `fail`, `return`, `send`, `match`, `if`, `run async`, `catch`, and `recover`.
 
 ### Rules
 
@@ -355,6 +355,51 @@ workflow deploy(env) {
 
 Bare `catch` without a binding is a parse error. All call arguments must appear inside parentheses before `catch`.
 
+### `recover` — Repair-and-Retry Loop
+
+`recover` is a first-class retry primitive for `run` steps. Unlike `catch` (which runs the recovery body once), `recover` implements a **loop**: try the target, and if it fails, bind the error, run the repair body, then retry. The loop stops when the target succeeds or when the retry limit is exhausted.
+
+```jaiph
+# Single-statement recovery loop
+run deploy() recover(err) run fix_deploy()
+
+# Block recovery loop
+run deploy(env) recover(err) {
+  log "Deploy failed: ${err}"
+  run auto_repair(env)
+}
+```
+
+**Semantics:**
+
+1. Execute the `run` target.
+2. If it succeeds, continue (the `recover` body never runs).
+3. If it fails, bind merged stdout+stderr to the `recover` binding (e.g. `err`), execute the repair body, then go to step 1.
+4. If the retry limit is reached and the target still fails, the step fails with the last error.
+
+**Retry limit:** The default limit is **10** attempts. Override it per-module with the `run.recover_limit` config key:
+
+```jaiph
+config {
+  run.recover_limit = 3
+}
+
+workflow default() {
+  run flaky_step() recover(err) {
+    log "Retrying after: ${err}"
+    run repair()
+  }
+}
+```
+
+**Capture:** When the target eventually succeeds, `const name = run ref() recover(err) { … }` captures the result (same rules as plain `run` — `return` value for workflows, stdout for scripts).
+
+**Constraints:**
+- `recover` requires exactly one binding: `recover(name)`. Bare `recover` without bindings is a parse error.
+- All call arguments must appear inside parentheses **before** `recover`.
+- `recover` is available on `run` steps in workflows only (not `ensure`, not `run async`).
+- `recover` and `catch` are mutually exclusive on the same step — use one or the other.
+
 ### `prompt` — Agent Interaction
 
 Sends text to the configured agent backend. Three body forms:
diff --git a/e2e/test_all.sh b/e2e/test_all.sh
index b3df284b..847791f2 100755
--- a/e2e/test_all.sh
+++ b/e2e/test_all.sh
@@ -79,6 +79,7 @@ TEST_SCRIPTS=(
   "e2e/tests/126_file_shorthand_routing.sh"
   "e2e/tests/127_cli_edge_cases.sh"
   "e2e/tests/128_examples_format_check.sh"
+  "e2e/tests/130_run_recover_loop.sh"
 )
 
 PASS_COUNT=0
diff --git a/e2e/tests/130_run_recover_loop.sh b/e2e/tests/130_run_recover_loop.sh
new file mode 100755
index 00000000..4d96a217
--- /dev/null
+++ b/e2e/tests/130_run_recover_loop.sh
@@ -0,0 +1,113 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+source "${ROOT_DIR}/e2e/lib/common.sh"
+trap e2e::cleanup EXIT
+
+e2e::prepare_test_env "run_recover_loop"
+TEST_DIR="${JAIPH_E2E_TEST_DIR}"
+
+# === Scenario: recover repairs then retries successfully ===
+e2e::section "recover loop: fail first, repair, pass on retry"
+rm -f "${TEST_DIR}/.gate_passed"
+
+e2e::file "recover_repair.jh" <<'EOF'
+script check_gate = `test -f .gate_passed`
+workflow check() {
+  run check_gate()
+}
+
+script do_fix = `touch .gate_passed`
+workflow fix() {
+  run do_fix()
+}
+
+workflow default() {
+  run check() recover(err) {
+    run fix()
+  }
+}
+EOF
+
+out="$(e2e::run "recover_repair.jh" 2>&1)"
+
+e2e::assert_file_exists "${TEST_DIR}/.gate_passed" "recover body ran (marker created)"
+e2e::expect_stdout "${out}" <<'EOF'
+
+Jaiph: Running recover_repair.jh
+
+workflow default
+  ▸ workflow check
+  ·   ▸ script check_gate
+  ·   ✗ script check_gate (<time>)
+  ✗ workflow check (<time>)
+  ▸ workflow fix
+  ·   ▸ script do_fix
+  ·   ✓ script do_fix (<time>)
+  ✓ workflow fix (<time>)
+  ▸ workflow check
+  ·   ▸ script check_gate
+  ·   ✓ script check_gate (<time>)
+  ✓ workflow check (<time>)
+✓ PASS workflow default (<time>)
+EOF
+e2e::pass "recover loop: repair and retry succeeded"
+
+# === Scenario: recover with retry limit exhaustion ===
+e2e::section "recover loop: retry limit exhaustion"
+
+e2e::file "recover_exhaust.jh" <<'EOF'
+config {
+  run.recover_limit = 2
+}
+
+script always_fail = `exit 1`
+workflow failing() {
+  run always_fail()
+}
+
+workflow default() {
+  run failing() recover(err) {
+    log "repair attempt"
+  }
+}
+EOF
+
+if out_exhaust="$(e2e::run "recover_exhaust.jh" 2>&1)"; then
+  e2e::fail "should have failed after retry limit"
+fi
+
+# nondeterministic timing in nested retry output
+e2e::assert_contains "${out_exhaust}" "FAIL" "workflow fails after retry limit exhaustion"
+
+# === Scenario: recover succeeds on first attempt (no loop) ===
+e2e::section "recover loop: success on first attempt skips body"
+
+e2e::file "recover_pass.jh" <<'EOF'
+script ok_impl = `echo ok`
+workflow ok() {
+  run ok_impl()
+}
+
+workflow default() {
+  run ok() recover(err) {
+    log "should not appear"
+  }
+}
+EOF
+
+out_pass="$(e2e::run "recover_pass.jh" 2>&1)"
+e2e::expect_stdout "${out_pass}" <<'EOF'
+
+Jaiph: Running recover_pass.jh
+
+workflow default
+  ▸ workflow ok
+  ·   ▸ script ok_impl
+  ·   ✓ script ok_impl (<time>)
+  ✓ workflow ok (<time>)
+✓ PASS workflow default (<time>)
+EOF
+e2e::pass "recover: success on first attempt, body never runs"
diff --git a/src/cli/run/progress.ts b/src/cli/run/progress.ts
index 1d000fc8..6746a430 100644
--- a/src/cli/run/progress.ts
+++ b/src/cli/run/progress.ts
@@ -81,7 +81,12 @@ export function collectWorkflowChildren(
       const arr: Array<{ label: string; nested?: string; stepFunc?: string }> = [
         { label: `${asyncPrefix}workflow ${wf}`, nested: wf, stepFunc },
       ];
-      if (s.recover) {
+      if (s.recoverLoop) {
+        const steps = "single" in s.recoverLoop ? [s.recoverLoop.single] : s.recoverLoop.block;
+        for (const r of steps) {
+          arr.push(...stepToItems(r));
+        }
+      } else if (s.recover) {
         const steps = "single" in s.recover ? [s.recover.single] : s.recover.block;
         for (const r of steps) {
           arr.push(...stepToItems(r));
diff --git a/src/format/emit.test.ts b/src/format/emit.test.ts
index 5b9ad334..05c5ae70 100644
--- a/src/format/emit.test.ts
+++ b/src/format/emit.test.ts
@@ -464,4 +464,41 @@ describe("emitModule", () => {
     ].join("\n");
     assert.equal(roundTrip(source), source);
   });
+
+  it("round-trips run with single recover statement", () => {
+    const source = [
+      "workflow default() {",
+      '  run deploy() recover (err) log "fixing"',
+      "}",
+      "",
+    ].join("\n");
+    assert.equal(roundTrip(source), source);
+  });
+
+  it("round-trips run with multiline recover block", () => {
+    const source = [
+      "workflow default() {",
+      "  run deploy() recover (err) {",
+      '    log "fixing"',
+      "    run fix()",
+      "  }",
+      "}",
+      "",
+    ].join("\n");
+    assert.equal(roundTrip(source), source);
+  });
+
+  it("round-trips config with run.recover_limit", () => {
+    const source = [
+      "config {",
+      "  run.recover_limit = 5",
+      "}",
+      "",
+      "workflow default() {",
+      '  log "ok"',
+      "}",
+      "",
+    ].join("\n");
+    assert.equal(roundTrip(source), source);
+  });
 });
diff --git a/src/format/emit.ts b/src/format/emit.ts
index 484488a4..a2649dcd 100644
--- a/src/format/emit.ts
+++ b/src/format/emit.ts
@@ -158,6 +158,9 @@ function emitConfigKeyLines(meta: WorkflowMetadata, key: string, pad: string): s
     case "run.inbox_parallel":
       if (meta.run?.inboxParallel === undefined) return [];
       return [`${pad}run.inbox_parallel = ${meta.run.inboxParallel}`];
+    case "run.recover_limit":
+      if (meta.run?.recoverLimit === undefined) return [];
+      return [`${pad}run.recover_limit = ${meta.run.recoverLimit}`];
     case "runtime.docker_enabled":
       if (meta.runtime?.dockerEnabled === undefined) return [];
       return [`${pad}runtime.docker_enabled = ${meta.runtime.dockerEnabled}`];
@@ -221,6 +224,7 @@ function emitConfig(meta: WorkflowMetadata, pad: string): string {
     if (meta.run.debug !== undefined) lines.push(`${pad}run.debug = ${meta.run.debug}`);
     if (meta.run.logsDir !== undefined) lines.push(`${pad}run.logs_dir = "${meta.run.logsDir}"`);
     if (meta.run.inboxParallel !== undefined) lines.push(`${pad}run.inbox_parallel = ${meta.run.inboxParallel}`);
+    if (meta.run.recoverLimit !== undefined) lines.push(`${pad}run.recover_limit = ${meta.run.recoverLimit}`);
   }
   if (meta.runtime) {
     if (meta.runtime.dockerEnabled !== undefined) lines.push(`${pad}runtime.docker_enabled = ${meta.runtime.dockerEnabled}`);
@@ -543,7 +547,19 @@ function emitStep(step: WorkflowStepDef, pad: string, currentIndent: string): st
       const ref = emitRef(step.workflow, step.args, step.bareIdentifierArgs);
       const capture = step.captureName ? `${step.captureName} = ` : "";
       const asyncPrefix = step.async ? "async " : "";
-      if (step.recover) {
+      if (step.recoverLoop) {
+        const b = step.recoverLoop.bindings;
+        const bindStr = `(${b.failure})`;
+        if ("single" in step.recoverLoop) {
+          const recoverLines = emitStep(step.recoverLoop.single, pad, "");
+          const recoverText = recoverLines.map((l) => l.trim()).join("\n");
+          lines.push(`${ci}${capture}run ${asyncPrefix}${ref} recover ${bindStr} ${recoverText}`);
+        } else {
+          lines.push(`${ci}${capture}run ${asyncPrefix}${ref} recover ${bindStr} {`);
+          lines.push(...emitSteps(step.recoverLoop.block, pad, ci + pad));
+          lines.push(`${ci}}`);
+        }
+      } else if (step.recover) {
         const b = step.recover.bindings;
         const bindStr = `(${b.failure})`;
         if ("single" in step.recover) {
diff --git a/src/parse/metadata.ts b/src/parse/metadata.ts
index 0b100024..663822c4 100644
--- a/src/parse/metadata.ts
+++ b/src/parse/metadata.ts
@@ -12,6 +12,7 @@ const ALLOWED_KEYS = new Set([
   "run.logs_dir",
   "run.debug",
   "run.inbox_parallel",
+  "run.recover_limit",
   "runtime.docker_enabled",
   "runtime.docker_image",
   "runtime.docker_network",
@@ -33,6 +34,7 @@ const KEY_TYPES: Record<string, "string" | "boolean" | "number" | "string[]"> =
   "run.logs_dir": "string",
   "run.debug": "boolean",
   "run.inbox_parallel": "boolean",
+  "run.recover_limit": "number",
   "runtime.docker_enabled": "boolean",
   "runtime.docker_image": "string",
   "runtime.docker_network": "string",
@@ -203,6 +205,11 @@ function assignConfigKey(
       out.run = {};
     }
     out.run.inboxParallel = value as boolean;
+  } else if (key === "run.recover_limit") {
+    if (!out.run) {
+      out.run = {};
+    }
+    out.run.recoverLimit = value as number;
   } else if (key === "runtime.docker_enabled") {
     if (!out.runtime) {
       out.runtime = {};
diff --git a/src/parse/parse-steps.test.ts b/src/parse/parse-steps.test.ts
index dd357a35..895728f7 100644
--- a/src/parse/parse-steps.test.ts
+++ b/src/parse/parse-steps.test.ts
@@ -1,7 +1,7 @@
 import test from "node:test";
 import assert from "node:assert/strict";
 import { parsejaiph } from "../parser";
-import { parseEnsureStep } from "./steps";
+import { parseEnsureStep, parseRunRecoverStep } from "./steps";
 
 // === parseEnsureStep: basic ensure without catch ===
 
@@ -284,3 +284,118 @@ test("parsejaiph: workflow with ensure catch and multiline triple-quoted prompt"
     }
   }
 });
+
+// === parseRunRecoverStep: basic recover ===
+
+test("parseRunRecoverStep: returns null when no recover keyword", () => {
+  const lines = ["  run my_workflow()"];
+  const result = parseRunRecoverStep("test.jh", lines, 0, 1, lines[0], "my_workflow()");
+  assert.equal(result, null);
+});
+
+test("parseRunRecoverStep: parses run with single recover statement", () => {
+  const lines = ['  run my_workflow() recover(err) log "repairing"'];
+  const result = parseRunRecoverStep("test.jh", lines, 0, 1, lines[0], 'my_workflow() recover(err) log "repairing"');
+  assert.ok(result);
+  const step = result!.step;
+  assert.equal(step.type, "run");
+  if (step.type === "run") {
+    assert.equal(step.workflow.value, "my_workflow");
+    assert.ok(step.recoverLoop);
+    assert.equal(step.recoverLoop!.bindings.failure, "err");
+    if ("single" in step.recoverLoop!) {
+      assert.equal(step.recoverLoop!.single.type, "log");
+    }
+  }
+});
+
+test("parseRunRecoverStep: parses run with inline recover block", () => {
+  const lines = ['  run fix() recover(e) { log "a"; run patch() }'];
+  const result = parseRunRecoverStep("test.jh", lines, 0, 1, lines[0], 'fix() recover(e) { log "a"; run patch() }');
+  assert.ok(result);
+  const step = result!.step;
+  if (step.type === "run" && step.recoverLoop && "block" in step.recoverLoop) {
+    assert.equal(step.recoverLoop.block.length, 2);
+    assert.equal(step.recoverLoop.block[0].type, "log");
+    assert.equal(step.recoverLoop.block[1].type, "run");
+  }
+});
+
+test("parseRunRecoverStep: parses run with multiline recover block", () => {
+  const lines = [
+    "  run deploy() recover(err) {",
+    '    log "retrying"',
+    "    run cleanup()",
+    "  }",
+  ];
+  const result = parseRunRecoverStep("test.jh", lines, 0, 1, lines[0], "deploy() recover(err) {");
+  assert.ok(result);
+  const step = result!.step;
+  if (step.type === "run" && step.recoverLoop && "block" in step.recoverLoop) {
+    assert.equal(step.recoverLoop.block.length, 2);
+    assert.equal(step.recoverLoop.block[0].type, "log");
+    assert.equal(step.recoverLoop.block[1].type, "run");
+  }
+  assert.equal(result!.nextIdx, 3);
+});
+
+test("parseRunRecoverStep: rejects recover at EOL without body", () => {
+  const lines = ["  run my_workflow() recover"];
+  assert.throws(
+    () => parseRunRecoverStep("test.jh", lines, 0, 1, lines[0], "my_workflow() recover"),
+    /recover requires explicit bindings/,
+  );
+});
+
+test("parseRunRecoverStep: rejects recover without bindings", () => {
+  const lines = ["  run my_workflow() recover {"];
+  assert.throws(
+    () => parseRunRecoverStep("test.jh", lines, 0, 1, lines[0], "my_workflow() recover {"),
+    /recover requires explicit bindings/,
+  );
+});
+
+test("parseRunRecoverStep: rejects recover with two bindings", () => {
+  const lines = ['  run my_workflow() recover(a, b) { log "x" }'];
+  assert.throws(
+    () => parseRunRecoverStep("test.jh", lines, 0, 1, lines[0], 'my_workflow() recover(a, b) { log "x" }'),
+    /recover accepts exactly one binding/,
+  );
+});
+
+test("parseRunRecoverStep: empty recover block throws", () => {
+  const lines = ["  run my_workflow() recover(err) { }"];
+  assert.throws(
+    () => parseRunRecoverStep("test.jh", lines, 0, 1, lines[0], "my_workflow() recover(err) { }"),
+    /recover block must contain at least one statement/,
+  );
+});
+
+// === parsejaiph: full workflow with recover ===
+
+test("parsejaiph: workflow with run recover block", () => {
+  const src = [
+    "workflow deploy() {",
+    '  run setup() recover(err) {',
+    '    log "fixing"',
+    '    run fix()',
+    '  }',
+    "}",
+    "workflow setup() {",
+    '  log "setup"',
+    "}",
+    "workflow fix() {",
+    '  log "fix"',
+    "}",
+    "",
+  ].join("\n");
+  const mod = parsejaiph(src, "recover_test.jh");
+  const w = mod.workflows.find((x) => x.name === "deploy");
+  assert.ok(w);
+  const runStep = w!.steps[0];
+  assert.equal(runStep.type, "run");
+  if (runStep.type === "run") {
+    assert.ok(runStep.recoverLoop);
+    assert.equal(runStep.recover, undefined);
+  }
+});
diff --git a/src/parse/steps.ts b/src/parse/steps.ts
index a7a4427d..fde94167 100644
--- a/src/parse/steps.ts
+++ b/src/parse/steps.ts
@@ -211,6 +211,47 @@ function parseCatchStatement(
         loc: { line: lineNo, col },
       };
     }
+    // Check for run ... recover inside catch/recover blocks
+    const recoverLoopMatch = runBody.match(/ recover(?=[\s(])/);
+    if (recoverLoopMatch) {
+      const recLoopIdx = recoverLoopMatch.index!;
+      const leftPart = runBody.slice(0, recLoopIdx).trim();
+      const rightPart = runBody.slice(recLoopIdx + " recover".length).trimStart();
+      const callPart = parseCallRef(leftPart);
+      if (callPart && !callPart.rest.trim() && rightPart.startsWith("(")) {
+        const closeParen = rightPart.indexOf(")");
+        if (closeParen !== -1) {
+          const bStr = rightPart.slice(1, closeParen).trim();
+          const bParts = bStr.split(",").map((s) => s.trim()).filter(Boolean);
+          if (bParts.length === 1 && /^[A-Za-z_][A-Za-z0-9_]*$/.test(bParts[0])) {
+            const bindings = { failure: bParts[0] };
+            const after = rightPart.slice(closeParen + 1).trim();
+            if (after.startsWith("{") && after.endsWith("}")) {
+              const blockContent = after.slice(1, -1).trim();
+              const stmts = splitCatchStatements(blockContent);
+              const blockSteps = stmts.map((s) => parseCatchStatement(filePath, lineNo, col, s));
+              return {
+                type: "run",
+                workflow: { value: callPart.ref, loc: { line: lineNo, col } },
+                args: callPart.args,
+                ...(callPart.bareIdentifierArgs ? { bareIdentifierArgs: callPart.bareIdentifierArgs } : {}),
+                recoverLoop: { block: blockSteps, bindings },
+              };
+            }
+            if (!after.startsWith("{") && after) {
+              const singleStep = parseCatchStatement(filePath, lineNo, col, after);
+              return {
+                type: "run",
+                workflow: { value: callPart.ref, loc: { line: lineNo, col } },
+                args: callPart.args,
+                ...(callPart.bareIdentifierArgs ? { bareIdentifierArgs: callPart.bareIdentifierArgs } : {}),
+                recoverLoop: { single: singleStep, bindings },
+              };
+            }
+          }
+        }
+      }
+    }
     // Check for run ... catch inside catch blocks
     const recIdx = runBody.indexOf(" catch ");
     if (recIdx !== -1) {
@@ -483,6 +524,122 @@ export function parseEnsureStep(
   return { step: { ...base, recover: { single: singleStep, bindings } }, nextIdx: idx };
 }
 
+/**
+ * Try to parse `run <ref>(args) recover(binding) { ... }` syntax (loop semantics).
+ * Returns null if the run body does not contain ` recover `.
+ */
+export function parseRunRecoverStep(
+  filePath: string,
+  lines: string[],
+  idx: number,
+  innerNo: number,
+  innerRaw: string,
+  runBody: string,
+  captureName?: string,
+): { step: WorkflowStepDef; nextIdx: number } | null {
+  // Match ` recover(`, ` recover `, or ` recover` at end of line
+  const recoverMatch = runBody.match(/ recover(?=[\s(]|$)/);
+  if (!recoverMatch) return null;
+  const recoverIdx = recoverMatch.index!;
+
+  if (/ recover$/.test(runBody)) {
+    const recoverCol = innerRaw.indexOf("recover") + 1;
+    fail(
+      filePath,
+      'recover requires explicit bindings and a body: recover(<name>) { ... }',
+      innerNo,
+      recoverCol,
+    );
+  }
+
+  const left = runBody.slice(0, recoverIdx).trim();
+  const right = runBody.slice(recoverIdx + " recover".length).trimStart();
+  const call = parseCallRef(left);
+  if (!call || call.rest.trim()) return null;
+  const runCol = innerRaw.indexOf("run") + 1;
+  const recoverCol = innerRaw.indexOf("recover") + 1;
+
+  if (!right.startsWith("(")) {
+    fail(
+      filePath,
+      'recover requires explicit bindings: recover(<name>) { ... }',
+      innerNo,
+      recoverCol,
+    );
+  }
+
+  const closeParen = right.indexOf(")");
+  if (closeParen === -1) {
+    fail(filePath, 'unterminated recover bindings: expected ")"', innerNo, recoverCol);
+  }
+  const bindingsStr = right.slice(1, closeParen).trim();
+  const bindingParts = bindingsStr.split(",").map((s) => s.trim()).filter(Boolean);
+  if (bindingParts.length === 0) {
+    fail(filePath, "recover requires exactly one binding: recover(<name>) { ... }", innerNo, recoverCol);
+  }
+  if (bindingParts.length > 1) {
+    fail(filePath, "recover accepts exactly one binding: recover(<name>)", innerNo, recoverCol);
+  }
+  if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(bindingParts[0])) {
+    fail(filePath, `invalid recover binding name: "${bindingParts[0]}" — must be a valid identifier`, innerNo, recoverCol);
+  }
+  const bindings = { failure: bindingParts[0] };
+
+  const afterBindings = right.slice(closeParen + 1).trim();
+  const base = {
+    type: "run" as const,
+    workflow: { value: call.ref, loc: { line: innerNo, col: runCol } },
+    args: call.args,
+    ...(call.bareIdentifierArgs ? { bareIdentifierArgs: call.bareIdentifierArgs } : {}),
+    ...(captureName ? { captureName } : {}),
+  };
+
+  if (afterBindings === "{") {
+    let blockLines: string[] = [];
+    let closeLineIdx = -1;
+    let braceDepth = 1;
+    for (let look = idx + 1; look < lines.length; look += 1) {
+      const trimmed = lines[look].trim();
+      if (trimmed.endsWith("{")) braceDepth += 1;
+      if (trimmed === "}") {
+        braceDepth -= 1;
+        if (braceDepth === 0) { closeLineIdx = look; break; }
+      }
+      blockLines.push(trimmed);
+    }
+    if (closeLineIdx === -1) {
+      fail(filePath, 'unterminated recover block, expected "}"', innerNo, recoverCol);
+    }
+    const statements = splitCatchStatements(blockLines.join("\n"));
+    if (statements.length === 0) {
+      fail(filePath, "recover block must contain at least one statement", innerNo, recoverCol);
+    }
+    const blockSteps = statements.map((s) => parseCatchStatement(filePath, innerNo, 1, s));
+    return { step: { ...base, recoverLoop: { block: blockSteps, bindings } }, nextIdx: closeLineIdx };
+  }
+
+  if (afterBindings.startsWith("{")) {
+    const closeBrace = afterBindings.indexOf("}");
+    if (closeBrace === -1) {
+      fail(filePath, 'unterminated recover block, expected "}"', innerNo, recoverCol);
+    }
+    const blockContent = afterBindings.slice(1, closeBrace).trim();
+    const statements = splitCatchStatements(blockContent);
+    if (statements.length === 0) {
+      fail(filePath, "recover block must contain at least one statement", innerNo, recoverCol);
+    }
+    const blockSteps = statements.map((s) => parseCatchStatement(filePath, innerNo, recoverCol, s));
+    return { step: { ...base, recoverLoop: { block: blockSteps, bindings } }, nextIdx: idx };
+  }
+
+  if (!afterBindings) {
+    fail(filePath, "recover requires a body after bindings", innerNo, recoverCol);
+  }
+
+  const singleStep = parseCatchStatement(filePath, innerNo, recoverCol, afterBindings);
+  return { step: { ...base, recoverLoop: { single: singleStep, bindings } }, nextIdx: idx };
+}
+
 /**
  * Try to parse `run <ref>(args) catch (bindings) { ... }` syntax.
  * Returns null if the run body does not contain ` catch `.
diff --git a/src/parse/workflow-brace.ts b/src/parse/workflow-brace.ts
index 120dfeb2..31cf0692 100644
--- a/src/parse/workflow-brace.ts
+++ b/src/parse/workflow-brace.ts
@@ -11,7 +11,7 @@ import {
 import { parseTripleQuoteBlock, tripleQuoteBodyToRaw } from "./triple-quote";
 import { parseConstRhs } from "./const-rhs";
 import { parseAnonymousInlineScript } from "./inline-script";
-import { parseEnsureStep, parseRunCatchStep } from "./steps";
+import { parseEnsureStep, parseRunCatchStep, parseRunRecoverStep } from "./steps";
 import { parsePromptStep } from "./prompt";
 import { parseSendRhs } from "./send-rhs";
 import { parseMatchExpr } from "./match";
@@ -254,6 +254,11 @@ export function parseBlockStatement(
     if (runBody.startsWith("script(") || runBody.startsWith("script (")) {
       fail(filePath, 'inline script syntax has changed: use run `body`(args) instead of run script(args) "body"', innerNo);
     }
+    // Check for run ... recover (loop semantics)
+    const recoverResult = parseRunRecoverStep(filePath, lines, idx, innerNo, innerRaw, runBody);
+    if (recoverResult) {
+      return { step: recoverResult.step, nextIdx: recoverResult.nextIdx + 1 };
+    }
     // Check for run ... catch
     const catchResult = parseRunCatchStep(filePath, lines, idx, innerNo, innerRaw, runBody);
     if (catchResult) {
diff --git a/src/parse/workflows.ts b/src/parse/workflows.ts
index f2bcadeb..d6539056 100644
--- a/src/parse/workflows.ts
+++ b/src/parse/workflows.ts
@@ -15,7 +15,7 @@ import { parseConfigBlock } from "./metadata";
 import { parsePromptStep } from "./prompt";
 import { parseSendRhs } from "./send-rhs";
 import { parseAnonymousInlineScript } from "./inline-script";
-import { parseEnsureStep, parseRunCatchStep } from "./steps";
+import { parseEnsureStep, parseRunCatchStep, parseRunRecoverStep } from "./steps";
 import { parseBraceBlockBody, parseBlockStatement } from "./workflow-brace";
 import { dottedReturnToQuotedString, isBareDottedIdentifierReturn } from "./workflow-return-dotted";
 import { parseMatchExpr } from "./match";
@@ -394,6 +394,13 @@ export function parseWorkflowBlock(
       if (runBody.startsWith("script(") || runBody.startsWith("script (")) {
         fail(filePath, 'inline script syntax has changed: use run `body`(args) instead of run script(args) "body"', innerNo);
       }
+      // Check for run ... recover (loop semantics)
+      const recoverResult = parseRunRecoverStep(filePath, lines, idx, innerNo, innerRaw, runBody);
+      if (recoverResult) {
+        workflow.steps.push(recoverResult.step);
+        idx = recoverResult.nextIdx;
+        continue;
+      }
       // Check for run ... catch
       const catchResult = parseRunCatchStep(filePath, lines, idx, innerNo, innerRaw, runBody);
       if (catchResult) {
diff --git a/src/runtime/kernel/node-workflow-runtime.ts b/src/runtime/kernel/node-workflow-runtime.ts
index 2e02a87d..07163273 100644
--- a/src/runtime/kernel/node-workflow-runtime.ts
+++ b/src/runtime/kernel/node-workflow-runtime.ts
@@ -1153,6 +1153,28 @@ export class NodeWorkflowRuntime {
           pendingAsync.push({ ref: step.workflow.value, promise });
           continue;
         }
+        if (step.recoverLoop) {
+          const limit = this.resolveRecoverLimit(scope.filePath);
+          const loopSteps = "single" in step.recoverLoop ? [step.recoverLoop.single] : step.recoverLoop.block;
+          let lastResult = await this.executeRunRef(scope, step.workflow.value, step.args ?? "");
+          let attempt = 1;
+          while (lastResult.status !== 0 && attempt <= limit) {
+            const loopVars = new Map(scope.vars);
+            loopVars.set(step.recoverLoop.bindings.failure, `${lastResult.output}${lastResult.error}`);
+            const rr = await this.executeSteps({ ...scope, vars: loopVars }, loopSteps);
+            if (rr.status !== 0 || rr.returnValue !== undefined) return this.mergeStepResult(accOut, accErr, rr);
+            lastResult = await this.executeRunRef(scope, step.workflow.value, step.args ?? "");
+            attempt += 1;
+          }
+          if (lastResult.status === 0) {
+            if (step.captureName) {
+              scope.vars.set(step.captureName, lastResult.returnValue ?? lastResult.output.trim());
+            }
+          } else {
+            return this.mergeStepResult(accOut, accErr, lastResult);
+          }
+          continue;
+        }
         const runResult = await this.executeRunRef(scope, step.workflow.value, step.args ?? "");
         if (runResult.status === 0) {
           if (step.captureName) {
@@ -1589,6 +1611,11 @@ export class NodeWorkflowRuntime {
     return nextEnv;
   }
 
+  private resolveRecoverLimit(filePath: string): number {
+    const moduleMeta = this.graph.modules.get(filePath)?.ast.metadata;
+    return moduleMeta?.run?.recoverLimit ?? 10;
+  }
+
   private async executeManagedStep(
     kind: "workflow" | "rule" | "script",
     name: string,
diff --git a/src/transpile/validate.ts b/src/transpile/validate.ts
index c197bccf..284d5b7d 100644
--- a/src/transpile/validate.ts
+++ b/src/transpile/validate.ts
@@ -576,6 +576,12 @@ export function validateReferences(ast: jaiphModule, ctx: ValidateContext): void
           rb.add(s.recover.bindings.failure);
           for (const r of steps) validateRuleStep(r);
         }
+        if (s.recoverLoop) {
+          const steps = "single" in s.recoverLoop ? [s.recoverLoop.single] : s.recoverLoop.block;
+          const rb = new Set<string>();
+          rb.add(s.recoverLoop.bindings.failure);
+          for (const r of steps) validateRuleStep(r);
+        }
         return;
       }
       if (s.type === "fail") {
@@ -899,6 +905,12 @@ export function validateReferences(ast: jaiphModule, ctx: ValidateContext): void
           rb.add(s.recover.bindings.failure);
           for (const r of steps) validateStep(r, rb);
         }
+        if (s.recoverLoop) {
+          const steps = "single" in s.recoverLoop ? [s.recoverLoop.single] : s.recoverLoop.block;
+          const rb = new Set<string>();
+          rb.add(s.recoverLoop.bindings.failure);
+          for (const r of steps) validateStep(r, rb);
+        }
         return;
       }
       if (s.type === "prompt") {
diff --git a/src/types.ts b/src/types.ts
index f3696a6c..0abcef2e 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -145,6 +145,10 @@ export type WorkflowStepDef =
       recover?:
         | { single: WorkflowStepDef; bindings: { failure: string } }
         | { block: WorkflowStepDef[]; bindings: { failure: string } };
+      /** When set, retry with repair loop semantics (try → fail → recover body → retry). */
+      recoverLoop?:
+        | { single: WorkflowStepDef; bindings: { failure: string } }
+        | { block: WorkflowStepDef[]; bindings: { failure: string } };
     }
   | {
       type: "prompt";
@@ -305,7 +309,7 @@ export interface WorkflowMetadata {
     cursorFlags?: string;
     claudeFlags?: string;
   };
-  run?: { debug?: boolean; logsDir?: string; inboxParallel?: boolean };
+  run?: { debug?: boolean; logsDir?: string; inboxParallel?: boolean; recoverLimit?: number };
   runtime?: RuntimeConfig;
   module?: { name?: string; version?: string; description?: string };
   /** Preserves `#` lines and assignment order inside `config { }` (formatter). */
diff --git a/test/sample-build.test.ts b/test/sample-build.test.ts
index 2d8439bd..4d2f141e 100644
--- a/test/sample-build.test.ts
+++ b/test/sample-build.test.ts
@@ -2425,3 +2425,157 @@ test("walkTestFiles discovers *.test.jh in directory", () => {
   }
 });
 
+// --- recover loop semantics ---
+
+test("recover: success on first attempt skips recover body", () => {
+  const root = mkdtempSync(join(tmpdir(), "jaiph-recover-pass-"));
+  try {
+    writeFileSync(
+      join(root, "main.jh"),
+      [
+        "script ok_impl = `echo ok`",
+        "workflow ok() {",
+        "  run ok_impl()",
+        "}",
+        "workflow default() {",
+        '  run ok() recover(err) {',
+        '    log "should not run"',
+        '  }',
+        "}",
+        "",
+      ].join("\n"),
+    );
+    const cliPath = join(process.cwd(), "dist/src/cli.js");
+    const r = spawnSync("node", [cliPath, "run", join(root, "main.jh")], {
+      encoding: "utf8",
+      cwd: root,
+      env: { ...process.env, JAIPH_DOCKER_ENABLED: "false" },
+    });
+    assert.equal(r.status, 0, r.stderr);
+    assert.match(r.stdout, /PASS/);
+  } finally {
+    rmSync(root, { recursive: true, force: true });
+  }
+});
+
+test("recover: one repair loop before success", () => {
+  const root = mkdtempSync(join(tmpdir(), "jaiph-recover-repair-"));
+  try {
+    // Script that fails unless a marker file exists (created by the recover body)
+    writeFileSync(
+      join(root, "main.jh"),
+      [
+        "script check = `test -f .marker`",
+        "workflow check_wf() {",
+        "  run check()",
+        "}",
+        "script fix_impl = `touch .marker`",
+        "workflow fix() {",
+        "  run fix_impl()",
+        "}",
+        "workflow default() {",
+        "  run check_wf() recover(err) {",
+        "    run fix()",
+        "  }",
+        "}",
+        "",
+      ].join("\n"),
+    );
+    const cliPath = join(process.cwd(), "dist/src/cli.js");
+    const r = spawnSync("node", [cliPath, "run", join(root, "main.jh")], {
+      encoding: "utf8",
+      cwd: root,
+      env: { ...process.env, JAIPH_DOCKER_ENABLED: "false" },
+    });
+    assert.equal(r.status, 0, r.stderr);
+    assert.match(r.stdout, /PASS/);
+    assert.ok(existsSync(join(root, ".marker")), "repair body should have created marker");
+  } finally {
+    rmSync(root, { recursive: true, force: true });
+  }
+});
+
+test("recover: retry limit exhaustion fails the workflow", () => {
+  const root = mkdtempSync(join(tmpdir(), "jaiph-recover-exhaust-"));
+  try {
+    writeFileSync(
+      join(root, "main.jh"),
+      [
+        "config {",
+        "  run.recover_limit = 2",
+        "}",
+        "",
+        "script always_fail = `exit 1`",
+        "workflow failing() {",
+        "  run always_fail()",
+        "}",
+        "workflow default() {",
+        '  run failing() recover(err) {',
+        '    log "repair attempt"',
+        '  }',
+        "}",
+        "",
+      ].join("\n"),
+    );
+    const cliPath = join(process.cwd(), "dist/src/cli.js");
+    const r = spawnSync("node", [cliPath, "run", join(root, "main.jh")], {
+      encoding: "utf8",
+      cwd: root,
+      env: { ...process.env, JAIPH_DOCKER_ENABLED: "false" },
+    });
+    assert.notEqual(r.status, 0, "should fail after retry limit exhausted");
+    const combined = r.stdout + r.stderr;
+    assert.match(combined, /FAIL/);
+  } finally {
+    rmSync(root, { recursive: true, force: true });
+  }
+});
+
+test("recover: retry limit configurable via config", () => {
+  const root = mkdtempSync(join(tmpdir(), "jaiph-recover-limit-"));
+  try {
+    // Counter file incremented by recover body; check script reads and compares.
+    writeFileSync(join(root, ".counter"), "0");
+    writeFileSync(
+      join(root, "main.jh"),
+      [
+        "config {",
+        "  run.recover_limit = 3",
+        "}",
+        "",
+        "script count_impl = ```",
+        'count=$(cat .counter)',
+        'if [ "$count" -ge 3 ]; then exit 0; fi',
+        "exit 1",
+        "```",
+        "workflow attempt_wf() {",
+        "  run count_impl()",
+        "}",
+        "script bump_impl = ```",
+        'count=$(cat .counter)',
+        'echo $(( count + 1 )) > .counter',
+        "```",
+        "workflow bump() {",
+        "  run bump_impl()",
+        "}",
+        "workflow default() {",
+        "  run attempt_wf() recover(err) {",
+        "    run bump()",
+        "  }",
+        "}",
+        "",
+      ].join("\n"),
+    );
+    const cliPath = join(process.cwd(), "dist/src/cli.js");
+    const r = spawnSync("node", [cliPath, "run", join(root, "main.jh")], {
+      encoding: "utf8",
+      cwd: root,
+      env: { ...process.env, JAIPH_DOCKER_ENABLED: "false" },
+    });
+    assert.equal(r.status, 0, r.stderr);
+    assert.match(r.stdout, /PASS/);
+  } finally {
+    rmSync(root, { recursive: true, force: true });
+  }
+});
+

From 97b3affdbe8a5bc5ced4f2e17de15fdbc3937a62 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Mon, 20 Apr 2026 14:54:49 +0200
Subject: [PATCH 13/38] Feat: Implement Handle<T> value model for run async
 with recover composition

Replace the implicit end-of-workflow join with a first-class Handle<T>
that run async returns immediately. Handles resolve transparently on
first non-passthrough read (argument passing to run, interpolation,
comparison, branching) while passthrough operations (assignment, list
storage, unchanged forwarding) leave them unresolved. Workflow exit
implicitly joins any remaining unresolved handles.

Ship recover composition for run async in the same change: the parser
now accepts recover(err) { ... } after run async ref(args), and the
runtime wires up the same retry-limit semantics used by non-async
recover. Includes the spec document, parser/formatter/runtime tests,
updated grammar and language docs, and syntax highlighter support.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CHANGELOG.md                                |   1 +
 QUEUE.md                                    |  44 ----
 README.md                                   |   2 +-
 docs/assets/js/main.js                      |   1 +
 docs/grammar.md                             |  45 +++-
 docs/index.html                             |  29 ++-
 docs/jaiph-skill.md                         |   2 +-
 docs/language.md                            |  36 ++-
 docs/spec-async-handles.md                  | 111 +++++++++
 e2e/tests/104_run_async.sh                  |  18 +-
 src/format/emit.test.ts                     |  35 +++
 src/format/emit.ts                          |   6 +-
 src/parse/const-rhs.ts                      |  21 ++
 src/parse/parse-run-async.test.ts           | 101 ++++++++-
 src/parse/workflows.ts                      |  16 ++
 src/runtime/kernel/node-workflow-runtime.ts | 204 +++++++++++++++--
 src/types.ts                                |   2 +-
 test/sample-build.test.ts                   | 239 ++++++++++++++++++++
 18 files changed, 805 insertions(+), 108 deletions(-)
 create mode 100644 docs/spec-async-handles.md

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f6bb9448..20beb513 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ## All changes
 
+- **Feature — Language/Runtime:** `Handle<T>` value model for `run async` — `run async ref(args)` now returns a first-class `Handle<T>` value instead of being a fire-and-forget statement. `T` is the same return type the function would have under a synchronous `run`. Capture is supported: `const h = run async ref()`. The handle resolves to the eventual return value on first non-passthrough read (string interpolation, passing as argument to `run`, comparison, conditional branching, match subject, channel send). Passthrough operations (initial capture into `const`, re-assignment) do not force resolution. Once resolved, the handle is replaced in-place by the resolved string value; subsequent reads return the cached value. Workflow exit implicitly joins all remaining unresolved handles created in that scope — this is not an error and preserves backward compatibility. `recover` composition works with `run async`: `run async foo() recover(err) { … }` — the async branch retries using the same retry-limit semantics as non-async `recover` (default 10, configurable via `run.recover_limit`). `catch` also works for single-shot recovery. The parser accepts `recover(err) { … }` and `catch(err) { … }` after `run async ref(args)` (the previous attempt silently rejected this with a "trailing content" error). There is no fire-and-forget mode — every `run async` creates a handle tracked by the runtime. No explicit `await` keyword — resolution is implicit on first read or at workflow exit. The docs-site Jaiph syntax highlighter (`docs/assets/js/main.js`) recognizes `async` as a keyword. Implementation: `Handle<T>` registry in `NodeWorkflowRuntime` (`createHandle`, `resolveHandleResult`, `resolveHandleVar`, `resolveHandlesInInput`), `async` flag on `run_capture` const RHS in `src/types.ts`, async capture parsing in `src/parse/const-rhs.ts`, `recover`/`catch` parsing for `run async` in `src/parse/workflows.ts`, formatter round-trip in `src/format/emit.ts`. Spec: `docs/spec-async-handles.md`. Parser, formatter, runtime, and E2E tests added. Docs updated (`docs/language.md`, `docs/grammar.md`, `docs/jaiph-skill.md`, `docs/index.html`).
 - **Feature — Language/Runtime:** `recover` loop semantics for `run` steps — `recover` is a new first-class repair-and-retry primitive for `run` steps, distinct from `catch`. Syntax: `run ref() recover(err) { … }`. On failure, the binding receives merged stdout+stderr, the repair body executes, and the target is retried automatically. The loop stops when the target succeeds or the retry limit is exhausted. The default retry limit is 10; override per-module with `run.recover_limit` in a `config` block. `catch` remains unchanged (one-shot recovery). `recover` and `catch` are mutually exclusive on the same step. Supported for non-isolated, non-async `run` in workflows only. The docs-site syntax highlighter (`docs/assets/js/main.js`) recognizes `recover` as a keyword. Implementation: `recoverLoop` field on `WorkflowStepDef` in `src/types.ts`, `parseRunRecoverStep` in `src/parse/steps.ts`, retry loop in `NodeWorkflowRuntime`, `run.recover_limit` config key in `src/parse/metadata.ts`, formatter round-trip in `src/format/emit.ts`, validation in `src/transpile/validate.ts`. Parser, formatter, validation, runtime, and E2E tests added. Docs updated (`docs/language.md`, `docs/grammar.md`, `docs/configuration.md`, `docs/jaiph-skill.md`, `docs/index.html`).
 - **Feature — Docker:** Workspace immutability contract and patch export — Docker runs now enforce an explicit immutability contract: the host workspace is bind-mounted read-only and the writable `/jaiph/workspace` inside the container is a sandbox-local copy-on-write layer discarded on exit. The only persistence channel from a Docker run to the host is the run-artifacts directory (`/jaiph/run` → host `.jaiph/runs`). During teardown, the runtime automatically exports a `workspace.patch` file (best-effort `git diff --binary` after `git add -N .`) into the run directory so sandbox edits can be reviewed or applied on the host with `git apply`. Patch export is runtime teardown behavior owned by `NodeWorkflowRuntime`, not workflow logic — it runs regardless of workflow exit status and failures are reported on stderr without changing the workflow's reported status. When there are no workspace changes, `workspace.patch` is omitted (not created). Non-Docker (local) runs are unaffected. Implementation: `exportWorkspacePatch()` in `src/runtime/docker.ts`, `exportPatchIfDocker()` in `src/runtime/kernel/node-workflow-runtime.ts`. Unit tests for non-empty patch, empty patch, and non-git directory added. Docs updated (`docs/sandboxing.md`, `docs/architecture.md`, `docs/artifacts.md`).
 - **Feature — Docker:** Default Docker when not CI or unsafe — Docker sandboxing is now **on by default** for local development. When neither `CI=true` nor `JAIPH_UNSAFE=true` is set in the environment, `runtime.docker_enabled` defaults to `true`. In CI environments or when `JAIPH_UNSAFE=true` is set, the default is `false`. Explicit overrides (`JAIPH_DOCKER_ENABLED` env var or in-file `runtime.docker_enabled`) always take precedence over the default rule. `JAIPH_UNSAFE=true` is the new explicit escape hatch for local development when Docker is unwanted. Implementation: `resolveDockerConfig()` in `src/runtime/docker.ts`. Unit tests for all env combinations added. Docs updated (`docs/sandboxing.md`, `docs/configuration.md`).
diff --git a/QUEUE.md b/QUEUE.md
index f4b47f6f..e336f7dd 100644
--- a/QUEUE.md
+++ b/QUEUE.md
@@ -13,50 +13,6 @@ Process rules:
 
 ***
 
-## Runtime — spec and implement `Handle<T>` for `run async`, including `recover` composition #dev-ready
-
-**Goal**
-Replace the current implicit end-of-workflow join with a value-based handle model. `run async foo()` returns a `Handle<T>` immediately. The handle resolves on first non-passthrough read. Workflow exit implicitly joins remaining unresolved handles. Ship `recover` composition for `run async` in the same task.
-
-This task ships **both the written spec and the runtime implementation in one go.** The previous attempt split them across two tasks and the spec drifted from the implementation. Keep them together so the contract and the code land in the same review.
-
-**Scope**
-
-* Write the spec section in `docs/spec-async-handles.md` (a new file) covering:
-  - `Handle<T>` value model: a handle resolves to whatever the called function returned. First non-passthrough read forces resolution. Passthrough (assignment, storage, passing through arguments and returns unchanged) does not.
-  - Workflow exit implicitly joins any remaining unresolved handles; this is not an error.
-  - No fire-and-forget mode.
-  - `recover` composition: `b1 = run async foo() recover(err) { ... }` — handle resolves to either the eventual success value (after the retry loop runs) or the final failure. Same retry-limit semantics as the non-async `recover` task.
-* Replace the implicit end-of-workflow join in `src/runtime/kernel/node-workflow-runtime.ts` with the value-based handle model.
-* `run async ...` returns a `Handle<T>` value. `T` is the same return type the function would have under a non-async `run`.
-* Reads that force resolution: passing as an argument to `run`, string interpolation, comparison, conditional branching, any other access to the underlying value.
-* Passthrough (assignment, storing in a list, passing through `workflow` arguments and returns unchanged) does not force resolution.
-* Workflow exit implicitly joins unresolved handles. This preserves today's end-of-workflow behavior at the boundary.
-* Parser must accept `recover(err) { ... }` after `run async ref(args)`. The previous attempt had the parser silently reject this with a "trailing content" error — that is the failure mode to fix.
-* Preserve async progress/event visibility unless the contract forces an intentional change.
-* Update docs that still describe the old statement-based async model.
-
-**Required tests**
-
-* Parser / formatter / validation coverage for `run async ref(args) recover(err) { ... }`.
-* Runtime tests for handle creation, transparent resolution at first read, and resolution forced by passing a handle into another `run`.
-* Runtime test for the multi-handle join shape: multiple async handles passed into another call all resolve before the callee runs.
-* Runtime test that workflow exit joins unresolved handles without raising an error.
-* Runtime test that handles can be stored in a list and resolved when read.
-* Runtime test for `run async foo() recover(err) { ... }`: handle resolves to the success value after at least one repair loop.
-* Runtime test that the retry-limit semantics are shared with the non-async `recover` task.
-
-**Acceptance criteria**
-
-* `run async ...` returns a first-class handle value.
-* Handle reads force resolution per the spec.
-* Workflow exit implicitly joins remaining handles (no error).
-* `recover` works on `run async ref()`. The parser accepts the form; the runtime implements the spec contract.
-* Spec and implementation ship in the same change set; the spec is internally consistent and self-contained.
-* The docs-site Jaiph syntax highlighter (`docs/assets/js/main.js`) recognizes `async` as a keyword (modifier on `run`) and continues to highlight `recover` correctly when it appears as `recover(err) { ... }` after `run async ref(args)`. A docs code block with `b1 = run async foo() recover(err) { ... }` renders with `run`, `async`, and `recover` all colored.
-
-***
-
 ## Artifacts — runtime mount + `artifacts.jh` lib for publishing files out of the sandbox #dev-ready
 
 **Goal**
diff --git a/README.md b/README.md
index 50310193..ff3109e5 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # ![Jaiph](docs/logo.png)
 
-[jaiph.org](https://jaiph.org) · [Getting Started](docs/getting-started.md) · [Setup](docs/setup.md) · [Libraries](docs/libraries.md) · [Grammar](docs/grammar.md) · [CLI](docs/cli.md) · [Configuration](docs/configuration.md) · [Testing](docs/testing.md) · [Hooks](docs/hooks.md) · [Inbox & Dispatch](docs/inbox.md) · [Sandboxing](docs/sandboxing.md) · [Runtime artifacts](docs/artifacts.md) · [Architecture](docs/architecture.md) · [Contributing](docs/contributing.md)
+[jaiph.org](https://jaiph.org) · [Getting Started](docs/getting-started.md) · [Setup](docs/setup.md) · [Libraries](docs/libraries.md) · [Grammar](docs/grammar.md) · [CLI](docs/cli.md) · [Configuration](docs/configuration.md) · [Testing](docs/testing.md) · [Hooks](docs/hooks.md) · [Inbox & Dispatch](docs/inbox.md) · [Sandboxing](docs/sandboxing.md) · [Runtime artifacts](docs/artifacts.md) · [Async Handles](docs/spec-async-handles.md) · [Architecture](docs/architecture.md) · [Contributing](docs/contributing.md)
 
 ---
 
diff --git a/docs/assets/js/main.js b/docs/assets/js/main.js
index 13beb83b..eb294fc6 100644
--- a/docs/assets/js/main.js
+++ b/docs/assets/js/main.js
@@ -23,6 +23,7 @@
         "catch",
         "recover",
         "run",
+        "async",
         "prompt",
         "returns",
         "mock",
diff --git a/docs/grammar.md b/docs/grammar.md
index d3287bec..f0346ab0 100644
--- a/docs/grammar.md
+++ b/docs/grammar.md
@@ -311,23 +311,46 @@ date +%s
 - **Backtick** (single-line) inline scripts: Jaiph interpolation (`${...}`) is forbidden — use `$1`, `$2` positional arguments instead.
 - **Fenced block** (triple-backtick) inline scripts: `${...}` is passed through to the shell as standard shell parameter expansion.
 
-### `run async` — Concurrent Execution
+### `run async` — Concurrent Execution with Handles
 
-`run async ref(args)` starts a workflow or script concurrently. All pending async steps are implicitly joined before the enclosing workflow returns. If any fail, the workflow fails with an aggregated error.
+`run async ref(args)` starts a workflow or script concurrently and returns a **`Handle<T>`** immediately. `T` is the same return type the function would have under a synchronous `run`. The handle resolves to the eventual return value on first non-passthrough read.
 
 ```jaiph
 workflow default() {
   run async lib.task_a()
-  run async lib.task_b()
-  # both joined automatically before workflow returns
+  const h = run async lib.task_b()
+  # Reading h forces resolution — blocks until task_b completes
+  log "${h}"
+  # task_a is implicitly joined before workflow returns
 }
 ```
 
+**Resolution semantics:** A handle resolves on first non-passthrough read. Reads that force resolution: string interpolation (`"${h}"`), passing as argument to `run`, comparison/conditional (`if h == "ok"`), match subject, channel send. Passthrough (initial capture, re-assignment) does not force resolution. Once resolved, the handle is replaced by the resolved string value; subsequent reads return the cached value.
+
+**Implicit join:** When a workflow scope exits, all remaining unresolved handles created in that scope are implicitly joined. This is not an error.
+
+**`recover` and `catch` composition:** Both `recover` (retry loop) and `catch` (single-shot) work with `run async`:
+
+```jaiph
+run async foo() recover(err) {
+  log "repairing: ${err}"
+  run fix_it()
+}
+
+run async bar() catch(e) {
+  log "caught: ${e}"
+}
+```
+
+`recover` uses the same retry-limit semantics as non-async `recover` (default 10, configurable via `run.recover_limit`).
+
 In the progress tree, each async branch is prefixed with a subscript number (₁₂₃…) assigned in dispatch order. Nested `run async` inside a child workflow gets its own numbering scope at the child's indent level. See [CLI — Async branch numbering](cli.md#run-progress-and-tree-output) for display details.
 
+See [Spec: Async Handles](spec-async-handles) for the full value model.
+
 Constraints:
 - Workflow-only — rejected in rules with `E_VALIDATE`.
-- Capture is not supported: `name = run async …` is `E_PARSE`.
+- Inline scripts not supported with `run async`.
 - For concurrent bash (pipelines, `&`), put the bash in a script and call with `run`.
 
 ### `ensure` — Execute a Rule
@@ -430,7 +453,7 @@ Syntax rules:
 - All call arguments must appear inside the parentheses **before** `recover`.
 - `recover` must be followed by at least one recovery step after the bindings.
 - `recover` and `catch` are mutually exclusive on the same `run` step.
-- `recover` is not supported on `ensure` or `run async` steps.
+- `recover` is not supported on `ensure` steps. `recover` works with `run async` — see [`run async`](#run-async--concurrent-execution-with-handles).
 
 ### `prompt` — Agent Interaction
 
@@ -799,7 +822,7 @@ Every step produces three distinct outputs — status, value, and logs:
 | `prompt` | prompt exit code | final assistant answer | transcript to artifacts |
 | `log` / `logerr` | always 0 | empty | event + stdout/stderr |
 | `fail` | non-zero (abort) | empty | message to stderr |
-| `run async` | aggregated | not supported (capture rejected) | async step logs to artifacts |
+| `run async` | aggregated | `Handle<T>` — resolves to return value on read | async step logs to artifacts |
 | `const` | same as RHS step | empty (binds local) | n/a |
 
 Key rules:
@@ -876,12 +899,14 @@ workflow_step   = ensure_stmt | run_stmt | run_catch_stmt | run_recover_stmt | r
 
 const_decl_step = "const" IDENT "=" const_rhs ;
 const_rhs       = double_quoted_string | triple_quoted_block | bash_value_expr
-                | "run" ( call_ref | inline_script ) | "ensure" call_ref
+                | "run" ( call_ref | inline_script ) | "run" "async" call_ref
+                | "ensure" call_ref
                 | "prompt" prompt_body [ returns_schema ]
                 | "match" IDENT "{" { match_arm } "}" ;
 
 fail_stmt       = "fail" ( double_quoted_string | triple_quoted_block ) ;
-run_async_stmt  = "run" "async" call_ref ;
+run_async_stmt  = "run" "async" call_ref [ "recover" recover_bindings recover_body ] [ "catch" catch_bindings catch_body ] ;
+run_async_capture = "const" IDENT "=" "run" "async" call_ref ;
 return_stmt     = "return" return_value ;
 return_value    = double_quoted_string | triple_quoted_block | "$" IDENT | "${" IDENT "}"
                 | "run" call_ref | "ensure" call_ref | "match" IDENT "{" { match_arm } "}" ;
@@ -969,5 +994,5 @@ At runtime, the Node workflow runtime interprets the AST directly:
 - **run … recover:** Repair-and-retry loop. On failure, the binding gets merged stdout+stderr, the repair body runs, and the target is retried. Loop stops on success or when `run.recover_limit` (default 10) is exhausted. Requires explicit bindings: `recover(err) { … }`.
 - **Recursion safety:** There is a hard recursion depth limit of 256. Exceeding it produces a runtime error.
 - **Assignment capture:** Rules and workflows use explicit `return "…"`. Scripts use stdout.
-- **`run async`:** Promise-based concurrency. Implicit join via `Promise.allSettled` before workflow returns. Failures aggregated.
+- **`run async`:** Returns a `Handle<T>` value. Handle-based concurrency with implicit resolution on first non-passthrough read and implicit join of unresolved handles at workflow exit. `recover` and `catch` composition supported. Failures aggregated at join.
 - **Channels:** Messages enqueued via `send`, dispatched to route targets at workflow end. Each target must declare exactly 3 parameters; the runtime binds message, channel, and sender to the declared names.
diff --git a/docs/index.html b/docs/index.html
index 9c7c629a..ae15903e 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -374,8 +374,9 @@ <h2 id="samples">Samples</h2>
                 <div class="code-tab-panel" data-panel="async-jh" data-sample="async" data-sample-file="async.jh">
                     <p>This sample runs two prompt workflows in parallel: one with Cursor and one with Claude.</p>
                     <p>Each workflow sets its own <code>agent.backend</code>, captures the prompt response, and logs it.
-                        The default workflow uses <code>run async</code> to fan out both workflows concurrently, with an
-                        implicit join before completion.</p>
+                        The default workflow uses <code>run async</code> to fan out both workflows concurrently.
+                        Each <code>run async</code> returns a <code>Handle&lt;T&gt;</code> that resolves on first read;
+                        unresolved handles are implicitly joined before the workflow completes.</p>
                     <pre><code class="language-jaiph" data-sample-source>#!/usr/bin/env jaiph
 
 const prompt_text = "Say: Greetings! I am [model name]."
@@ -446,9 +447,11 @@ <h3>Language</h3>
                     tags providing the runtime: <code>```node</code>, <code>```python3</code>, <code>```ruby</code>,
                     <code>```pwsh</code> etc.
                 </p>
-                <p><strong>Async calls.</strong> For async managed work, use <code>run async wf()</code> — Jaiph fans
-                    out the workflows concurrently and <strong>implicitly joins</strong> them before the parent workflow
-                    completes.
+                <p><strong>Async calls.</strong> <code>run async wf()</code> returns a <code>Handle&lt;T&gt;</code>
+                    that resolves on first read. Capture with <code>const h = run async wf()</code> and read
+                    the handle when you need the value. Unresolved handles are <strong>implicitly joined</strong>
+                    before the parent workflow completes. Supports <code>recover</code> and <code>catch</code>
+                    composition for async error handling.
                 </p>
                 <p><strong>Agent inbox pattern (channels).</strong> Use inbox channels as a way to pass messages between
                     workflows. Declare channels at top level with <code>channel &lt;name&gt; [-&gt; workflow]</code>
@@ -584,11 +587,17 @@ <h3>Jaiph workflows</h3>
                         <a href="grammar#step-output-contract">Step output contract</a>.
                     </dd>
 
-                    <dt><code>run async ref(args)</code></dt>
-                    <dd>Run a workflow or script concurrently. All async steps are implicitly joined
-                        before the workflow completes; failures are aggregated. Workflows only &mdash;
-                        capture (<code>const x = run async ...</code>) is not supported.
-                        See <a href="grammar">Grammar</a>.
+                    <dt><code>run async ref(args)</code> &middot;
+                        <code>const h = run async ref(args)</code>
+                    </dt>
+                    <dd>Run a workflow or script concurrently. Returns a <code>Handle&lt;T&gt;</code>
+                        that resolves on first non-passthrough read (interpolation, passing as arg to
+                        <code>run</code>, comparison, conditional). Passthrough (capture, re-assignment)
+                        does not force resolution. Unresolved handles are implicitly joined at workflow exit.
+                        Supports <code>recover</code> (retry loop) and <code>catch</code> (single-shot) composition:
+                        <code>run async foo() recover(err) { &hellip; }</code>.
+                        Workflows only. See <a href="grammar">Grammar</a> and
+                        <a href="spec-async-handles">Spec: Async Handles</a>.
                     </dd>
 
                     <dt><code>fail "reason"</code> &middot; <code>fail """..."""</code></dt>
diff --git a/docs/jaiph-skill.md b/docs/jaiph-skill.md
index 9d6964e8..e3e07c8c 100644
--- a/docs/jaiph-skill.md
+++ b/docs/jaiph-skill.md
@@ -103,7 +103,7 @@ Prefer composable modules over one large file.
   - **Bindings and capture** — `const name = …` (the `const` keyword is required for all captures). For **`ensure`** / **`run` to a workflow or rule**, capture is the callee’s explicit **`return "…"`**. For **`run` to a script**, capture follows **stdout** from the script body. **`prompt`** capture is the agent answer. **`const`** RHS cannot use `$(...)` or disallowed `${...}` forms — use a **`script`** and `const x = run helper(…)`. **`const`** must not use a **bare** `ref(args…)` call shape: use **`const x = run ref(args…)`** (or **`ensure`** for rules), not **`const x = ref(args…)`** — the compiler fails with **`E_PARSE`** and suggests the **`run`** form. Do not put Jaiph symbols inside `$(...)` — use `ensure` / `run`. See [Grammar](grammar.md#step-output-contract).
   - **return** — `return "value"` / `return "${var}"` / `return """..."""` sets the managed return value. Also supports **direct managed calls**: `return run ref()` or `return run ref(args)` and `return ensure ref()` or `return ensure ref(args)` — these execute the target and use its result as the return value, equivalent to `const x = run ref(args)` then `return "${x}"`. Parentheses are required on all call sites.
   - **fail** — `fail "reason"` or `fail """..."""` aborts with stderr message and non-zero exit (workflows; fails the rule when used inside a rule).
-  - **run async** — `run async ref([args...])` starts a workflow or script concurrently. All pending async steps are implicitly joined before the workflow completes; failures are aggregated. Capture (`const name = run async ...`) is not supported. Workflows only — rejected in rules.
+  - **run async** — `run async ref([args...])` starts a workflow or script concurrently and returns a **`Handle<T>`**. Capture is supported: `const h = run async ref()`. The handle resolves on first non-passthrough read (string interpolation, passing as arg to `run`, comparison, conditional, match subject). Passthrough (initial capture, re-assignment) does not force resolution. Unresolved handles are implicitly joined at workflow exit. `recover` (retry loop) and `catch` (single-shot) composition work with `run async`: `run async foo() recover(err) { … }`. Workflows only — rejected in rules.
   - **match** — `match var { "literal" => …, /regex/ => …, _ => … }` pattern-matches on a string value. The subject is always a bare identifier (no `$` or `${}`). Arms are tested top-to-bottom; the first match wins. Patterns: double-quoted string literal (exact match), `/regex/` (regex match), or `_` (wildcard — exactly one required). Usable as a statement, as an expression (`const x = match var { … }`), or with `return` (`return match var { … }`). Using `$var` or `${var}` as the match subject is a parse error. Allowed in both workflows and rules. See [Grammar](grammar.md#match).
   - **if** — `if var == "value" { … }` or `if var =~ /pattern/ { … }`. Subject is a bare identifier. Operators: `==` (exact string equality), `!=` (inequality), `=~` (regex match), `!~` (regex non-match). Operand is a `"string"` for `==`/`!=` or `/regex/` for `=~`/`!~`. Body is a brace block of valid workflow/rule steps. No `else` branch — use `match` for exhaustive value branching. `if` is a statement (no value production; cannot use with `const` or `return`). Allowed in both workflows and rules.
 - **Prompts:** Three body forms: (1) **single-line string** `prompt "..."` — double-quoted, single line only; (2) **identifier** `prompt myVar` — uses the value of an existing binding; (3) **triple-quoted block** `prompt """ ... """` — for multiline text, opening `"""` on the same line as `prompt`. Triple backticks (`` ``` ``) in prompt context are rejected with guidance — they are reserved for scripts. Multiline double-quoted strings are rejected — use a triple-quoted block instead. All forms support `${identifier}` interpolation (`${varName}`, `${paramName}`). **Inline capture interpolation** is also supported: `${run ref([args])}` and `${ensure ref([args])}` inside the prompt string or triple-quoted body (e.g. `prompt "Fix: ${ensure get_diagnostics()}"`). Nested inline captures are rejected. Bare `$varName` is not valid in orchestration strings. `$(...)` and `${var:-fallback}` are rejected. Capture: `const name = prompt "..."`, `const x = prompt myVar`, `const y = prompt """ ... """`. Optional **typed prompt:** `const name = prompt "..." returns "{ field: type, ... }"` or `const name = prompt myVar returns "..."` (flat schema; types `string`, `number`, `boolean`) validates the agent's JSON and sets `${name}` plus per-field variables accessible via **dot notation** — `${name.field}`. Dot notation is validated at compile time: the variable must be a typed prompt capture and the field must exist in the schema. **Orchestration bindings are strings:** typed fields are coerced with `String()` after JSON validation, so e.g. a numeric field is still the text `"42"` in scope. See [Grammar](grammar.md).
diff --git a/docs/language.md b/docs/language.md
index e9528b76..94be5e82 100644
--- a/docs/language.md
+++ b/docs/language.md
@@ -306,19 +306,41 @@ const output = run transform()
 
 **Capture:** For a workflow, captures the explicit `return` value. For a script, captures stdout.
 
-### `run async` — Concurrent Execution
+### `run async` — Concurrent Execution with Handles
 
-Starts a workflow or script concurrently. All pending async steps are implicitly joined before the enclosing workflow returns.
+`run async ref(args)` starts a workflow or script concurrently and returns a **`Handle<T>`** — a value that resolves to the called function's return value on first non-passthrough read. `T` is the same type the function would return under a synchronous `run`.
 
 ```jaiph
 workflow default() {
+  # Fire-and-forget style (handle created but not captured)
   run async lib.task_a()
-  run async lib.task_b()
-  # both joined automatically before workflow returns
+
+  # Capture the handle for later use
+  const h = run async lib.task_b()
+
+  # Reading the handle forces resolution (blocks until task_b completes)
+  log "${h}"
 }
 ```
 
-Constraints: workflow-only (rejected in rules), capture not supported.
+**Handle resolution:** The handle resolves on first non-passthrough read — string interpolation, passing as argument to `run`, comparison, conditional branching, or match subject. Passthrough operations (initial capture into `const`, re-assignment) do not force resolution.
+
+**Implicit join:** When a workflow scope exits, the runtime implicitly joins all remaining unresolved handles created in that scope. This is not an error — it preserves backward compatibility with the pre-handle `run async` model.
+
+**`recover` composition:** `recover` works with `run async` to provide retry-loop semantics on the async branch:
+
+```jaiph
+const b1 = run async foo() recover(err) {
+  log "repairing: ${err}"
+  run fix_it()
+}
+```
+
+The async branch retries `foo()` using the same retry-limit semantics as non-async `recover` (default 10, configurable via `run.recover_limit`). The handle resolves to the eventual success value or the final failure. `catch` also works with `run async` for single-shot recovery (no retry loop).
+
+See [Spec: Async Handles](spec-async-handles) for the full value model.
+
+Constraints: workflow-only (rejected in rules), inline scripts not supported with `run async`.
 
 ### `ensure` — Execute a Rule
 
@@ -397,7 +419,7 @@ workflow default() {
 **Constraints:**
 - `recover` requires exactly one binding: `recover(name)`. Bare `recover` without bindings is a parse error.
 - All call arguments must appear inside parentheses **before** `recover`.
-- `recover` is available on `run` steps in workflows only (not `ensure`, not `run async`).
+- `recover` is available on `run` steps in workflows only (not `ensure`). `recover` also works with `run async` — see [`run async`](#run-async--concurrent-execution-with-handles).
 - `recover` and `catch` are mutually exclusive on the same step — use one or the other.
 
 ### `prompt` — Agent Interaction
@@ -691,7 +713,7 @@ Every step produces three outputs: status, value, and logs.
 | `prompt` | exit code | final assistant answer | artifacts |
 | `log` / `logerr` | always 0 | — | event stream |
 | `fail` | non-zero (abort) | — | stderr |
-| `run async` | aggregated | not supported | artifacts |
+| `run async` | aggregated | `Handle<T>` — resolves to return value on read | artifacts |
 | `const` | same as RHS | binds locally | — |
 
 ## Lexical Notes
diff --git a/docs/spec-async-handles.md b/docs/spec-async-handles.md
new file mode 100644
index 00000000..57eb87d8
--- /dev/null
+++ b/docs/spec-async-handles.md
@@ -0,0 +1,111 @@
+---
+title: "Spec: Async Handles"
+---
+
+# Async Handles — `Handle<T>` Value Model
+
+This document specifies the `Handle<T>` value model for `run async` in Jaiph.
+
+## Overview
+
+`run async ref(args)` returns a **`Handle<T>`** immediately. `T` is the same type the called function would return under a synchronous `run`. The handle resolves to the eventual return value on first non-passthrough read.
+
+## Handle creation
+
+```jaiph
+const h = run async foo()
+run async bar()
+```
+
+- `const h = run async foo()` — `h` receives a handle. The async execution starts immediately; the workflow continues without waiting.
+- `run async bar()` — a handle is created internally but not captured. The workflow proceeds without waiting.
+
+There is **no fire-and-forget mode**. Every `run async` creates a handle tracked by the runtime, whether captured or not.
+
+## Resolution semantics
+
+A handle resolves to the value of the called function (its `return` value or trimmed stdout). Resolution is triggered by the **first non-passthrough read**.
+
+### Reads that force resolution
+
+| Access pattern | Example | Forces resolution? |
+|---|---|---|
+| String interpolation | `log "${h}"` | Yes |
+| Passing as argument to `run` | `run other(h)` | Yes |
+| Comparison / conditional | `if h == "ok" { ... }` | Yes |
+| Match subject | `match h { ... }` | Yes |
+| Any other value access | `channel <- $h` | Yes |
+
+### Passthrough (does NOT force resolution)
+
+| Access pattern | Example | Forces resolution? |
+|---|---|---|
+| Initial capture | `const h = run async foo()` | No |
+| Re-assignment | (internal scope passing) | No |
+
+Once resolved, the handle is replaced in-place by the resolved string value. Subsequent reads return the cached value without re-executing.
+
+## Workflow exit — implicit join
+
+When a workflow scope exits (the last step completes), the runtime **implicitly joins all remaining unresolved handles** created in that scope. This is not an error condition.
+
+- If all handles resolve successfully, the workflow returns normally.
+- If any handle resolved (or resolves during join) with a non-zero status, the workflow fails with an aggregated error message listing all failed async refs.
+
+This preserves backward compatibility with the pre-handle `run async` behavior where all async steps were awaited at workflow exit.
+
+## `recover` composition
+
+`recover` works with `run async` to provide retry-loop semantics on the async branch:
+
+```jaiph
+const b1 = run async foo() recover(err) {
+  log "repairing: ${err}"
+  run fix_it()
+}
+```
+
+### Semantics
+
+1. The async branch executes `foo()`.
+2. If `foo()` succeeds, the handle resolves to its return value.
+3. If `foo()` fails, the recover body runs with `err` bound to the merged stdout+stderr of the failure.
+4. If the recover body completes successfully (status 0, no early return), `foo()` is retried.
+5. Steps 3–4 repeat until `foo()` succeeds or the retry limit is reached.
+6. If the retry limit is exhausted, the handle resolves to the final failure result.
+
+### Retry limit
+
+The retry limit is shared with non-async `recover`:
+
+- Default: **10** attempts.
+- Configurable per module/workflow via `config { run.recover_limit = N }`.
+
+### `catch` composition
+
+`catch` also works with `run async` for single-shot recovery (no retry loop):
+
+```jaiph
+run async foo() catch(err) {
+  log "caught: ${err}"
+}
+```
+
+If `foo()` fails, the catch body runs once. No retry.
+
+## Interaction with progress/events
+
+Async handles preserve the existing async progress/event visibility model:
+
+- Each async branch gets a unique branch index (subscript numbering: ₁, ₂, …).
+- Step events (`STEP_START`, `STEP_END`) and log events carry `async_indices` for the branch.
+- The CLI progress tree renders async branches at the appropriate indent level.
+
+Handle resolution does not emit additional events beyond what the async branch already emits.
+
+## Constraints
+
+- `run async` is only allowed in workflows, not in rules.
+- `run async` is not supported with inline scripts (`` run async `body`(args) ``).
+- There is no explicit `await` keyword. Resolution is implicit on first read or at workflow exit.
+- There is no fire-and-forget. All handles are joined.
diff --git a/e2e/tests/104_run_async.sh b/e2e/tests/104_run_async.sh
index cd8b9e8f..785b3994 100755
--- a/e2e/tests/104_run_async.sh
+++ b/e2e/tests/104_run_async.sh
@@ -139,30 +139,30 @@ EXPECTED
 e2e::expect_out "async_interleave.jh" "default" "slow-done"
 e2e::expect_out "async_interleave.jh" "slow" "slow-done"
 
-# --- capture + run async is rejected at parse time ---
+# --- capture + run async returns handle that resolves on read ---
 
-e2e::section "capture + run async parse error"
+e2e::section "capture + run async returns handle"
 
 e2e::file "capture_async.jh" <<'EOF'
 workflow helper() {
-  log "hi"
+  return "hello"
 }
 
 workflow default() {
   const x = run async helper()
+  log x
 }
 EOF
 
-set +e
 capture_output="$(e2e::run "capture_async.jh" 2>&1)"
 capture_status=$?
-set -e
 
-if [[ "$capture_status" -eq 0 ]]; then
-  e2e::fail "expected parse error for capture + run async"
+if [[ "$capture_status" -ne 0 ]]; then
+  e2e::fail "expected capture + run async to succeed, got exit $capture_status"
 fi
-# Error line includes absolute source path which varies per invocation
-e2e::assert_contains "$capture_output" "const ... = run must target a valid reference" "capture + run async diagnostic"
+e2e::pass "capture + run async succeeds"
+
+e2e::expect_out "capture_async.jh" "default" "hello"
 
 # --- run async sibling depth in progress tree ---
 
diff --git a/src/format/emit.test.ts b/src/format/emit.test.ts
index 05c5ae70..773f2478 100644
--- a/src/format/emit.test.ts
+++ b/src/format/emit.test.ts
@@ -501,4 +501,39 @@ describe("emitModule", () => {
     ].join("\n");
     assert.equal(roundTrip(source), source);
   });
+
+  it("round-trips const capture with run async", () => {
+    const source = [
+      "workflow default() {",
+      "  const h = run async foo()",
+      "}",
+      "",
+    ].join("\n");
+    assert.equal(roundTrip(source), source);
+  });
+
+  it("round-trips run async with recover block", () => {
+    const source = [
+      "workflow default() {",
+      "  run async foo() recover (err) {",
+      '    log "repair"',
+      "  }",
+      "}",
+      "",
+    ].join("\n");
+    assert.equal(roundTrip(source), source);
+  });
+
+  it("round-trips run async with multi-line recover block", () => {
+    const source = [
+      "workflow default() {",
+      "  run async foo() recover (err) {",
+      '    log "repairing"',
+      "    run fix_it()",
+      "  }",
+      "}",
+      "",
+    ].join("\n");
+    assert.equal(roundTrip(source), source);
+  });
 });
diff --git a/src/format/emit.ts b/src/format/emit.ts
index a2649dcd..0275f686 100644
--- a/src/format/emit.ts
+++ b/src/format/emit.ts
@@ -766,8 +766,10 @@ function emitConstStep(name: string, value: ConstRhs): string {
         return `const ${name} = """`;
       }
       return `const ${name} = ${value.bashRhs}`;
-    case "run_capture":
-      return `const ${name} = run ${emitRef(value.ref, value.args, value.bareIdentifierArgs)}`;
+    case "run_capture": {
+      const asyncMod = value.async ? "async " : "";
+      return `const ${name} = run ${asyncMod}${emitRef(value.ref, value.args, value.bareIdentifierArgs)}`;
+    }
     case "ensure_capture":
       return `const ${name} = ensure ${emitRef(value.ref, value.args, value.bareIdentifierArgs)}`;
     case "prompt_capture": {
diff --git a/src/parse/const-rhs.ts b/src/parse/const-rhs.ts
index 31b80fb8..252a088a 100644
--- a/src/parse/const-rhs.ts
+++ b/src/parse/const-rhs.ts
@@ -92,6 +92,27 @@ export function parseConstRhs(
   }
   if (head.startsWith("run ")) {
     const rest = head.slice("run ".length).trim();
+    // const x = run async ref() — async capture returning a handle
+    if (rest.startsWith("async ")) {
+      const asyncRest = rest.slice("async ".length).trim();
+      if (asyncRest.startsWith("`")) {
+        fail(filePath, "run async is not supported with inline scripts", lineNo, col);
+      }
+      const call = parseCallRef(asyncRest);
+      if (!call) {
+        fail(filePath, "const ... = run async must target a valid reference", lineNo, col);
+      }
+      rejectTrailingContent(filePath, lineNo, "run async", call.rest);
+      const ref: WorkflowRefDef = { value: call.ref, loc: { line: lineNo, col } };
+      return {
+        value: {
+          kind: "run_capture", ref, args: call.args,
+          ...(call.bareIdentifierArgs ? { bareIdentifierArgs: call.bareIdentifierArgs } : {}),
+          async: true,
+        },
+        nextLineIdx: lineIdx,
+      };
+    }
     if (rest.startsWith("`")) {
       const result = parseAnonymousInlineScript(filePath, lines, lineIdx, rest, lineNo, col);
       return {
diff --git a/src/parse/parse-run-async.test.ts b/src/parse/parse-run-async.test.ts
index 9bd178fb..cd9d0d77 100644
--- a/src/parse/parse-run-async.test.ts
+++ b/src/parse/parse-run-async.test.ts
@@ -62,7 +62,7 @@ test("parse: regular run does not have async flag", () => {
   }
 });
 
-test("parse: capture + run async is rejected", () => {
+test("parse: capture + run async is rejected without const", () => {
   const src = [
     "workflow default() {",
     "  x = run async some_wf()",
@@ -73,3 +73,102 @@ test("parse: capture + run async is rejected", () => {
     /assignment without "const" is no longer supported/,
   );
 });
+
+test("parse: const capture + run async produces run_capture with async flag", () => {
+  const src = [
+    "workflow default() {",
+    "  const h = run async some_wf()",
+    "}",
+  ].join("\n");
+  const mod = parsejaiph(src, "test.jh");
+  const step = mod.workflows[0]!.steps[0]!;
+  assert.equal(step.type, "const");
+  if (step.type === "const") {
+    assert.equal(step.name, "h");
+    assert.equal(step.value.kind, "run_capture");
+    if (step.value.kind === "run_capture") {
+      assert.equal(step.value.ref.value, "some_wf");
+      assert.equal(step.value.async, true);
+    }
+  }
+});
+
+test("parse: const capture + run async with args", () => {
+  const src = [
+    "workflow default() {",
+    '  const h = run async other_wf("hello")',
+    "}",
+  ].join("\n");
+  const mod = parsejaiph(src, "test.jh");
+  const step = mod.workflows[0]!.steps[0]!;
+  assert.equal(step.type, "const");
+  if (step.type === "const") {
+    assert.equal(step.value.kind, "run_capture");
+    if (step.value.kind === "run_capture") {
+      assert.equal(step.value.ref.value, "other_wf");
+      assert.equal(step.value.args, '"hello"');
+      assert.equal(step.value.async, true);
+    }
+  }
+});
+
+test("parse: run async with recover block", () => {
+  const src = [
+    "workflow default() {",
+    '  run async foo() recover(err) { log "repair" }',
+    "}",
+  ].join("\n");
+  const mod = parsejaiph(src, "test.jh");
+  const step = mod.workflows[0]!.steps[0]!;
+  assert.equal(step.type, "run");
+  if (step.type === "run") {
+    assert.equal(step.workflow.value, "foo");
+    assert.equal(step.async, true);
+    assert.ok(step.recoverLoop);
+    if (step.recoverLoop && "block" in step.recoverLoop) {
+      assert.equal(step.recoverLoop.bindings.failure, "err");
+      assert.equal(step.recoverLoop.block.length, 1);
+      assert.equal(step.recoverLoop.block[0].type, "log");
+    }
+  }
+});
+
+test("parse: run async with multi-line recover block", () => {
+  const src = [
+    "workflow default() {",
+    "  run async foo() recover(err) {",
+    '    log "repairing"',
+    "    run fix_it()",
+    "  }",
+    "}",
+  ].join("\n");
+  const mod = parsejaiph(src, "test.jh");
+  const step = mod.workflows[0]!.steps[0]!;
+  assert.equal(step.type, "run");
+  if (step.type === "run") {
+    assert.equal(step.async, true);
+    assert.ok(step.recoverLoop);
+    if (step.recoverLoop && "block" in step.recoverLoop) {
+      assert.equal(step.recoverLoop.block.length, 2);
+    }
+  }
+});
+
+test("parse: run async with catch block", () => {
+  const src = [
+    "workflow default() {",
+    '  run async bar() catch (e) { log "caught" }',
+    "}",
+  ].join("\n");
+  const mod = parsejaiph(src, "test.jh");
+  const step = mod.workflows[0]!.steps[0]!;
+  assert.equal(step.type, "run");
+  if (step.type === "run") {
+    assert.equal(step.workflow.value, "bar");
+    assert.equal(step.async, true);
+    assert.ok(step.recover);
+    if (step.recover && "block" in step.recover) {
+      assert.equal(step.recover.bindings.failure, "e");
+    }
+  }
+});
diff --git a/src/parse/workflows.ts b/src/parse/workflows.ts
index d6539056..b4a2d1c0 100644
--- a/src/parse/workflows.ts
+++ b/src/parse/workflows.ts
@@ -358,6 +358,22 @@ export function parseWorkflowBlock(
       if (runBody.startsWith("`")) {
         fail(filePath, "run async is not supported with inline scripts", innerNo, innerRaw.indexOf("run") + 1);
       }
+      // Check for run async ... recover (loop semantics)
+      const recoverResult = parseRunRecoverStep(filePath, lines, idx, innerNo, innerRaw, runBody);
+      if (recoverResult) {
+        if (recoverResult.step.type === "run") recoverResult.step.async = true;
+        workflow.steps.push(recoverResult.step);
+        idx = recoverResult.nextIdx;
+        continue;
+      }
+      // Check for run async ... catch
+      const catchResult = parseRunCatchStep(filePath, lines, idx, innerNo, innerRaw, runBody);
+      if (catchResult) {
+        if (catchResult.step.type === "run") catchResult.step.async = true;
+        workflow.steps.push(catchResult.step);
+        idx = catchResult.nextIdx;
+        continue;
+      }
       const call = parseCallRef(runBody);
       if (!call) {
         fail(filePath, "run async must target a valid reference: run async ref() or run async ref(args) — parentheses are required", innerNo);
diff --git a/src/runtime/kernel/node-workflow-runtime.ts b/src/runtime/kernel/node-workflow-runtime.ts
index 07163273..76eccbec 100644
--- a/src/runtime/kernel/node-workflow-runtime.ts
+++ b/src/runtime/kernel/node-workflow-runtime.ts
@@ -23,6 +23,14 @@ const MAX_EMBED = 1024 * 1024;
 const MAX_RECURSION_DEPTH = 256;
 type EnsureRecover = Extract<WorkflowStepDef, { type: "ensure" }>["recover"];
 
+const HANDLE_PREFIX = "__JAIPH_HANDLE__";
+
+type AsyncHandle = {
+  ref: string;
+  promise: Promise<StepResult>;
+  resolved?: StepResult;
+};
+
 /** Mock body definition: shell for script mocks, Jaiph steps for workflow/rule mocks. */
 export type MockBodyDef =
   | { kind: "shell"; body: string; params: string[] }
@@ -330,6 +338,8 @@ export class NodeWorkflowRuntime {
   private promptSeq = 0;
   private workflowCtxStack: WorkflowContext[] = [];
   private readonly mockBodies: Map<string, MockBodyDef>;
+  private handleRegistry = new Map<string, AsyncHandle>();
+  private handleIdCounter = 0;
 
   private getFrameStack(): Frame[] {
     return this.asyncFrameStack.getStore() ?? this.stack;
@@ -339,6 +349,55 @@ export class NodeWorkflowRuntime {
     return this.asyncIndicesStorage.getStore() ?? [];
   }
 
+  private createHandle(ref: string, promise: Promise<StepResult>): string {
+    this.handleIdCounter += 1;
+    const handleId = `${HANDLE_PREFIX}${this.handleIdCounter}`;
+    this.handleRegistry.set(handleId, { ref, promise });
+    return handleId;
+  }
+
+  private isHandle(value: string): boolean {
+    return value.startsWith(HANDLE_PREFIX);
+  }
+
+  /** Resolve a handle to its StepResult. Caches the result for subsequent reads. */
+  private async resolveHandleResult(handleId: string): Promise<StepResult> {
+    const handle = this.handleRegistry.get(handleId);
+    if (!handle) return { status: 1, output: "", error: "invalid handle" };
+    if (handle.resolved) return handle.resolved;
+    const result = await handle.promise;
+    handle.resolved = result;
+    return result;
+  }
+
+  /** Resolve a handle value to the string it represents. Updates scope var in place. */
+  private async resolveHandleVar(scope: Scope, varName: string): Promise<StepResult> {
+    const val = scope.vars.get(varName);
+    if (!val || !this.isHandle(val)) return { status: 0, output: "", error: "" };
+    const result = await this.resolveHandleResult(val);
+    if (result.status === 0) {
+      scope.vars.set(varName, result.returnValue ?? result.output.trim());
+    } else {
+      scope.vars.set(varName, "");
+    }
+    return result;
+  }
+
+  /** Scan input for ${var} references and resolve any that are handles. */
+  private async resolveHandlesInInput(scope: Scope, input: string): Promise<StepResult | null> {
+    const re = /\$\{([a-zA-Z_][a-zA-Z0-9_]*)/g;
+    let m: RegExpExecArray | null;
+    while ((m = re.exec(input)) !== null) {
+      const varName = m[1];
+      const val = scope.vars.get(varName);
+      if (val && this.isHandle(val)) {
+        const r = await this.resolveHandleVar(scope, varName);
+        if (r.status !== 0) return r;
+      }
+    }
+    return null;
+  }
+
   constructor(graph: RuntimeGraph, opts: { env?: NodeJS.ProcessEnv; cwd?: string; mockBodies?: Map<string, MockBodyDef> }) {
     this.graph = graph;
     this.env = opts.env ?? process.env;
@@ -728,6 +787,9 @@ export class NodeWorkflowRuntime {
     input: string,
     scope: Scope,
   ): Promise<{ ok: true; value: string } | { ok: false; result: StepResult }> {
+    // Resolve any handle-valued vars referenced in the input before interpolating.
+    const handleErr = await this.resolveHandlesInInput(scope, input);
+    if (handleErr) return { ok: false, result: handleErr };
     const re = new RegExp(NodeWorkflowRuntime.INLINE_CAPTURE_RE.source, "g");
     if (!re.test(input)) {
       return { ok: true, value: interpolate(input, scope.vars, scope.env) };
@@ -754,6 +816,12 @@ export class NodeWorkflowRuntime {
     scope: Scope,
     expr: MatchExprDef,
   ): Promise<{ ok: true; value: string } | { ok: false; result: StepResult }> {
+    // Resolve handle if the subject variable is a handle.
+    const rawSubject = scope.vars.get(expr.subject);
+    if (rawSubject && this.isHandle(rawSubject)) {
+      const hr = await this.resolveHandleVar(scope, expr.subject);
+      if (hr.status !== 0) return { ok: false, result: hr };
+    }
     // Subject is a bare identifier — resolve against scope variables
     const subject = scope.vars.get(expr.subject) ?? scope.env?.[expr.subject] ?? "";
     for (const arm of expr.arms) {
@@ -809,7 +877,8 @@ export class NodeWorkflowRuntime {
     let accOut = "";
     let accErr = "";
     let returnValue: string | undefined;
-    const pendingAsync: Array<{ ref: string; promise: Promise<StepResult> }> = [];
+    /** Handle IDs created by `run async` in this scope (for implicit join at exit). */
+    const localHandleIds: string[] = [];
     let asyncCounter = 0;
     for (const step of steps) {
       if (step.type === "comment" || step.type === "blank_line") continue;
@@ -888,6 +957,8 @@ export class NodeWorkflowRuntime {
           if (!sendIr.ok) return this.mergeStepResult(accOut, accErr, sendIr.result);
           payload = sendIr.value;
         } else if (step.rhs.kind === "var") {
+          const sendHandleErr = await this.resolveHandlesInInput(scope, step.rhs.bash);
+          if (sendHandleErr) return this.mergeStepResult(accOut, accErr, sendHandleErr);
           payload = interpolate(step.rhs.bash, scope.vars, scope.env);
         } else if (step.rhs.kind === "run") {
           const runValue = await this.executeRunRef(scope, step.rhs.ref.value, step.rhs.args ?? "");
@@ -1031,7 +1102,24 @@ export class NodeWorkflowRuntime {
           continue;
         }
         if (step.value.kind === "run_capture") {
-          const runResult = await this.executeRunRef(scope, step.value.ref.value, step.value.args ?? "");
+          const captureRef = step.value.ref.value;
+          const captureArgs = step.value.args ?? "";
+          if (step.value.async) {
+            // Async capture: create handle, store in scope, register for join.
+            asyncCounter += 1;
+            const branchStack = [...this.getFrameStack()];
+            const branchIndices = [...this.getAsyncIndices(), asyncCounter];
+            const promise = this.asyncFrameStack.run(branchStack, () =>
+              this.asyncIndicesStorage.run(branchIndices, () =>
+                this.executeRunRef(scope, captureRef, captureArgs),
+              ),
+            );
+            const handleId = this.createHandle(captureRef, promise);
+            localHandleIds.push(handleId);
+            scope.vars.set(step.name, handleId);
+            continue;
+          }
+          const runResult = await this.executeRunRef(scope, captureRef, captureArgs);
           if (runResult.status !== 0) return this.mergeStepResult(accOut, accErr, runResult);
           scope.vars.set(step.name, runResult.returnValue ?? runResult.output.trim());
           continue;
@@ -1145,12 +1233,54 @@ export class NodeWorkflowRuntime {
           asyncCounter += 1;
           const branchStack = [...this.getFrameStack()];
           const branchIndices = [...this.getAsyncIndices(), asyncCounter];
-          const promise = this.asyncFrameStack.run(branchStack, () =>
-            this.asyncIndicesStorage.run(branchIndices, () =>
-              this.executeRunRef(scope, step.workflow.value, step.args ?? ""),
-            ),
-          );
-          pendingAsync.push({ ref: step.workflow.value, promise });
+          let promise: Promise<StepResult>;
+          if (step.recoverLoop) {
+            // Async + recover loop: wrap retry logic in a single promise.
+            const recoverLimit = this.resolveRecoverLimit(scope.filePath);
+            const loopSteps = "single" in step.recoverLoop ? [step.recoverLoop.single] : step.recoverLoop.block;
+            const recoverBindings = step.recoverLoop.bindings;
+            promise = this.asyncFrameStack.run(branchStack, () =>
+              this.asyncIndicesStorage.run(branchIndices, async () => {
+                let lastResult = await this.executeRunRef(scope, step.workflow.value, step.args ?? "");
+                let attempt = 1;
+                while (lastResult.status !== 0 && attempt <= recoverLimit) {
+                  const loopVars = new Map(scope.vars);
+                  loopVars.set(recoverBindings.failure, `${lastResult.output}${lastResult.error}`);
+                  const rr = await this.executeSteps({ ...scope, vars: loopVars }, loopSteps);
+                  if (rr.status !== 0 || rr.returnValue !== undefined) return rr;
+                  lastResult = await this.executeRunRef(scope, step.workflow.value, step.args ?? "");
+                  attempt += 1;
+                }
+                return lastResult;
+              }),
+            );
+          } else if (step.recover) {
+            // Async + catch: single-shot recovery in the async branch.
+            const recoverSteps = "single" in step.recover ? [step.recover.single] : step.recover.block;
+            const recoverBindings = step.recover.bindings;
+            promise = this.asyncFrameStack.run(branchStack, () =>
+              this.asyncIndicesStorage.run(branchIndices, async () => {
+                const result = await this.executeRunRef(scope, step.workflow.value, step.args ?? "");
+                if (result.status === 0) return result;
+                const recoverVars = new Map(scope.vars);
+                recoverVars.set(recoverBindings.failure, `${result.output}${result.error}`);
+                const rr = await this.executeSteps({ ...scope, vars: recoverVars }, recoverSteps);
+                if (rr.status !== 0 || rr.returnValue !== undefined) return rr;
+                return { status: 0, output: result.output, error: result.error };
+              }),
+            );
+          } else {
+            promise = this.asyncFrameStack.run(branchStack, () =>
+              this.asyncIndicesStorage.run(branchIndices, () =>
+                this.executeRunRef(scope, step.workflow.value, step.args ?? ""),
+              ),
+            );
+          }
+          const handleId = this.createHandle(step.workflow.value, promise);
+          localHandleIds.push(handleId);
+          if (step.captureName) {
+            scope.vars.set(step.captureName, handleId);
+          }
           continue;
         }
         if (step.recoverLoop) {
@@ -1211,6 +1341,12 @@ export class NodeWorkflowRuntime {
         continue;
       }
       if (step.type === "if") {
+        // Resolve handle if the subject variable is a handle.
+        const rawSubject = scope.vars.get(step.subject);
+        if (rawSubject && this.isHandle(rawSubject)) {
+          const hr = await this.resolveHandleVar(scope, step.subject);
+          if (hr.status !== 0) return this.mergeStepResult(accOut, accErr, hr);
+        }
         const subjectVal = scope.vars.get(step.subject) ?? scope.env?.[step.subject] ?? "";
         let condMet = false;
         if (step.operator === "==" && step.operand.kind === "string_literal") {
@@ -1239,20 +1375,34 @@ export class NodeWorkflowRuntime {
         continue;
       }
     }
-    // Implicit join: await all pending async steps before returning.
-    if (pendingAsync.length > 0) {
-      const settled = await Promise.allSettled(pendingAsync.map((p) => p.promise));
+    // Implicit join: await all unresolved handles created in this scope before returning.
+    if (localHandleIds.length > 0) {
       const failures: string[] = [];
-      for (let i = 0; i < settled.length; i += 1) {
-        const r = settled[i]!;
-        if (r.status === "rejected") {
-          failures.push(`run async ${pendingAsync[i]!.ref}: ${String(r.reason)}`);
-        } else if (r.value.status !== 0) {
-          failures.push(`run async ${pendingAsync[i]!.ref}: ${r.value.error}`);
-          accOut += r.value.output;
-          accErr += r.value.error;
-        } else {
-          accOut += r.value.output;
+      for (const handleId of localHandleIds) {
+        const handle = this.handleRegistry.get(handleId);
+        if (!handle) continue;
+        if (handle.resolved) {
+          // Already resolved (via a read earlier) — just check status.
+          if (handle.resolved.status !== 0) {
+            failures.push(`run async ${handle.ref}: ${handle.resolved.error}`);
+            accOut += handle.resolved.output;
+            accErr += handle.resolved.error;
+          } else {
+            accOut += handle.resolved.output;
+          }
+          continue;
+        }
+        try {
+          const result = await this.resolveHandleResult(handleId);
+          if (result.status !== 0) {
+            failures.push(`run async ${handle.ref}: ${result.error}`);
+            accOut += result.output;
+            accErr += result.error;
+          } else {
+            accOut += result.output;
+          }
+        } catch (err) {
+          failures.push(`run async ${handle.ref}: ${String(err)}`);
         }
       }
       if (failures.length > 0) {
@@ -1373,12 +1523,19 @@ export class NodeWorkflowRuntime {
     return `${filePath}::${name}`;
   }
 
-  /** Synchronous fast-path: resolve args when every token is a plain literal. */
+  /** Synchronous fast-path: resolve args when every token is a plain literal and no handles. */
   private resolveArgsRawSync(scope: Scope, raw: string | string[]): string[] | null {
     if (Array.isArray(raw)) return raw;
     const tokens = parseArgTokens(raw);
     for (const token of tokens) {
       if (token.kind !== "literal") return null;
+      // Bail to async path if any referenced var is a handle.
+      const varRe = /\$\{([a-zA-Z_][a-zA-Z0-9_]*)/g;
+      let vm: RegExpExecArray | null;
+      while ((vm = varRe.exec(token.value)) !== null) {
+        const val = scope.vars.get(vm[1]);
+        if (val && this.isHandle(val)) return null;
+      }
     }
     return tokens.map((t) => interpolate((t as { kind: "literal"; value: string }).value, scope.vars, scope.env));
   }
@@ -1391,6 +1548,9 @@ export class NodeWorkflowRuntime {
     const resolved: string[] = [];
     for (const token of tokens) {
       if (token.kind === "literal") {
+        // Resolve handles before interpolating.
+        const handleErr = await this.resolveHandlesInInput(scope, token.value);
+        if (handleErr) return handleErr;
         resolved.push(interpolate(token.value, scope.vars, scope.env));
         continue;
       }
diff --git a/src/types.ts b/src/types.ts
index 0abcef2e..95190710 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -53,7 +53,7 @@ export interface MatchExprDef {
 
 export type ConstRhs =
   | { kind: "expr"; bashRhs: string; /** `const x = """..."""` — runtime dedents margin. */ tripleQuoted?: boolean }
-  | { kind: "run_capture"; ref: WorkflowRefDef; args?: string; bareIdentifierArgs?: string[] }
+  | { kind: "run_capture"; ref: WorkflowRefDef; args?: string; bareIdentifierArgs?: string[]; async?: boolean }
   | { kind: "ensure_capture"; ref: RuleRefDef; args?: string; bareIdentifierArgs?: string[] }
   | {
       kind: "prompt_capture";
diff --git a/test/sample-build.test.ts b/test/sample-build.test.ts
index 4d2f141e..2362ad43 100644
--- a/test/sample-build.test.ts
+++ b/test/sample-build.test.ts
@@ -2579,3 +2579,242 @@ test("recover: retry limit configurable via config", () => {
   }
 });
 
+// ── Handle<T> async model tests ──
+
+test("handle: const capture run async creates handle that resolves on read", () => {
+  const root = mkdtempSync(join(tmpdir(), "jaiph-handle-capture-"));
+  try {
+    writeFileSync(
+      join(root, "main.jh"),
+      [
+        'script echo_val = `echo "hello"`',
+        "workflow greet() {",
+        "  run echo_val()",
+        '  return "hello"',
+        "}",
+        "workflow default() {",
+        "  const h = run async greet()",
+        '  log "${h}"',
+        "}",
+        "",
+      ].join("\n"),
+    );
+    const cliPath = join(process.cwd(), "dist/src/cli.js");
+    const r = spawnSync("node", [cliPath, "run", join(root, "main.jh")], {
+      encoding: "utf8",
+      cwd: root,
+      env: { ...process.env, JAIPH_DOCKER_ENABLED: "false" },
+    });
+    assert.equal(r.status, 0, r.stderr);
+    assert.match(r.stdout, /PASS/);
+  } finally {
+    rmSync(root, { recursive: true, force: true });
+  }
+});
+
+test("handle: passing handle as arg to run forces resolution", () => {
+  const root = mkdtempSync(join(tmpdir(), "jaiph-handle-resolve-arg-"));
+  try {
+    writeFileSync(
+      join(root, "main.jh"),
+      [
+        "workflow producer() {",
+        '  return "produced"',
+        "}",
+        "workflow consumer(val) {",
+        '  log "${val}"',
+        "}",
+        "workflow default() {",
+        "  const h = run async producer()",
+        "  run consumer(h)",
+        "}",
+        "",
+      ].join("\n"),
+    );
+    const cliPath = join(process.cwd(), "dist/src/cli.js");
+    const r = spawnSync("node", [cliPath, "run", join(root, "main.jh")], {
+      encoding: "utf8",
+      cwd: root,
+      env: { ...process.env, JAIPH_DOCKER_ENABLED: "false" },
+    });
+    assert.equal(r.status, 0, r.stderr);
+    assert.match(r.stdout, /PASS/);
+  } finally {
+    rmSync(root, { recursive: true, force: true });
+  }
+});
+
+test("handle: multi-handle join — multiple async handles passed into another call", () => {
+  const root = mkdtempSync(join(tmpdir(), "jaiph-handle-multi-"));
+  try {
+    writeFileSync(
+      join(root, "main.jh"),
+      [
+        "workflow make_a() {",
+        '  return "A"',
+        "}",
+        "workflow make_b() {",
+        '  return "B"',
+        "}",
+        "workflow combine(a, b) {",
+        '  log "${a}-${b}"',
+        "}",
+        "workflow default() {",
+        "  const ha = run async make_a()",
+        "  const hb = run async make_b()",
+        "  run combine(ha, hb)",
+        "}",
+        "",
+      ].join("\n"),
+    );
+    const cliPath = join(process.cwd(), "dist/src/cli.js");
+    const r = spawnSync("node", [cliPath, "run", join(root, "main.jh")], {
+      encoding: "utf8",
+      cwd: root,
+      env: { ...process.env, JAIPH_DOCKER_ENABLED: "false" },
+    });
+    assert.equal(r.status, 0, r.stderr);
+    assert.match(r.stdout, /PASS/);
+  } finally {
+    rmSync(root, { recursive: true, force: true });
+  }
+});
+
+test("handle: workflow exit joins unresolved handles without error", () => {
+  const root = mkdtempSync(join(tmpdir(), "jaiph-handle-join-"));
+  try {
+    writeFileSync(
+      join(root, "main.jh"),
+      [
+        'script noop = `echo "done"`',
+        "workflow bg() {",
+        "  run noop()",
+        "}",
+        "workflow default() {",
+        "  const h = run async bg()",
+        '  log "continuing"',
+        "  # h is never read — implicit join at exit",
+        "}",
+        "",
+      ].join("\n"),
+    );
+    const cliPath = join(process.cwd(), "dist/src/cli.js");
+    const r = spawnSync("node", [cliPath, "run", join(root, "main.jh")], {
+      encoding: "utf8",
+      cwd: root,
+      env: { ...process.env, JAIPH_DOCKER_ENABLED: "false" },
+    });
+    assert.equal(r.status, 0, r.stderr);
+    assert.match(r.stdout, /PASS/);
+  } finally {
+    rmSync(root, { recursive: true, force: true });
+  }
+});
+
+test("handle: handles stored in separate vars and resolved when read", () => {
+  const root = mkdtempSync(join(tmpdir(), "jaiph-handle-stored-"));
+  try {
+    writeFileSync(
+      join(root, "main.jh"),
+      [
+        "workflow first() {",
+        '  return "1"',
+        "}",
+        "workflow second() {",
+        '  return "2"',
+        "}",
+        "workflow default() {",
+        "  const h1 = run async first()",
+        "  const h2 = run async second()",
+        "  # Both stored, not resolved yet",
+        '  log "${h1}"',
+        '  log "${h2}"',
+        "}",
+        "",
+      ].join("\n"),
+    );
+    const cliPath = join(process.cwd(), "dist/src/cli.js");
+    const r = spawnSync("node", [cliPath, "run", join(root, "main.jh")], {
+      encoding: "utf8",
+      cwd: root,
+      env: { ...process.env, JAIPH_DOCKER_ENABLED: "false" },
+    });
+    assert.equal(r.status, 0, r.stderr);
+    assert.match(r.stdout, /PASS/);
+  } finally {
+    rmSync(root, { recursive: true, force: true });
+  }
+});
+
+test("handle: run async foo() recover — handle resolves to success after repair", () => {
+  const root = mkdtempSync(join(tmpdir(), "jaiph-handle-recover-"));
+  try {
+    writeFileSync(
+      join(root, "main.jh"),
+      [
+        "script check = `test -f .marker`",
+        "workflow check_wf() {",
+        "  run check()",
+        "}",
+        "script fix_impl = `touch .marker`",
+        "workflow fix() {",
+        "  run fix_impl()",
+        "}",
+        "workflow default() {",
+        "  run async check_wf() recover(err) {",
+        "    run fix()",
+        "  }",
+        "}",
+        "",
+      ].join("\n"),
+    );
+    const cliPath = join(process.cwd(), "dist/src/cli.js");
+    const r = spawnSync("node", [cliPath, "run", join(root, "main.jh")], {
+      encoding: "utf8",
+      cwd: root,
+      env: { ...process.env, JAIPH_DOCKER_ENABLED: "false" },
+    });
+    assert.equal(r.status, 0, r.stderr);
+    assert.match(r.stdout, /PASS/);
+    assert.ok(existsSync(join(root, ".marker")), "repair body should have created marker");
+  } finally {
+    rmSync(root, { recursive: true, force: true });
+  }
+});
+
+test("handle: run async recover shares retry-limit semantics with non-async recover", () => {
+  const root = mkdtempSync(join(tmpdir(), "jaiph-handle-recover-limit-"));
+  try {
+    writeFileSync(
+      join(root, "main.jh"),
+      [
+        "config {",
+        "  run.recover_limit = 2",
+        "}",
+        "",
+        "script always_fail = `exit 1`",
+        "workflow failing() {",
+        "  run always_fail()",
+        "}",
+        "workflow default() {",
+        '  run async failing() recover(err) {',
+        '    log "repair attempt"',
+        '  }',
+        "}",
+        "",
+      ].join("\n"),
+    );
+    const cliPath = join(process.cwd(), "dist/src/cli.js");
+    const r = spawnSync("node", [cliPath, "run", join(root, "main.jh")], {
+      encoding: "utf8",
+      cwd: root,
+      env: { ...process.env, JAIPH_DOCKER_ENABLED: "false" },
+    });
+    assert.notEqual(r.status, 0, "should fail after retry limit exhausted");
+    const combined = r.stdout + r.stderr;
+    assert.match(combined, /FAIL/);
+  } finally {
+    rmSync(root, { recursive: true, force: true });
+  }
+});
+

From e34a2f97cb5d67b459b32bfd13ef5a742440cb90 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Mon, 20 Apr 2026 15:09:37 +0200
Subject: [PATCH 14/38] Feat: Add artifacts.jh library and runtime mount for
 publishing files out of the sandbox

Introduce a two-layer artifacts system for workflows running inside the
Docker sandbox (or on the host). The runtime layer creates a
.jaiph/runs/<run_id>/artifacts/ directory before workflow execution and
exposes its path via the JAIPH_ARTIFACTS_DIR env var (resolving to
/jaiph/run/artifacts in the container, the host path otherwise). The
library layer ships .jaiph/libs/jaiphlang/artifacts.jh paired with
artifacts.sh, providing three export workflow entries: save (copy a file
into artifacts), save_patch (git diff excluding .jaiph/), and
apply_patch (git apply). The library mirrors the existing queue.jh
pattern. Includes runtime unit tests, an E2E test, and docs updates.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .jaiph/libs/jaiphlang/artifacts.jh            |  36 +++++
 .jaiph/libs/jaiphlang/artifacts.sh            |  72 +++++++++
 CHANGELOG.md                                  |   1 +
 QUEUE.md                                      |  69 --------
 docs/artifacts.md                             |   2 +
 docs/configuration.md                         |   2 +
 docs/index.html                               |   5 +
 docs/libraries.md                             |  43 +++++
 e2e/tests/129_artifacts_lib.sh                | 152 ++++++++++++++++++
 src/cli/run/env.ts                            |   1 +
 .../node-workflow-runtime.artifacts.test.ts   |  58 +++++++
 src/runtime/kernel/node-workflow-runtime.ts   |   3 +
 12 files changed, 375 insertions(+), 69 deletions(-)
 create mode 100644 .jaiph/libs/jaiphlang/artifacts.jh
 create mode 100755 .jaiph/libs/jaiphlang/artifacts.sh
 create mode 100755 e2e/tests/129_artifacts_lib.sh

diff --git a/.jaiph/libs/jaiphlang/artifacts.jh b/.jaiph/libs/jaiphlang/artifacts.jh
new file mode 100644
index 00000000..5181e212
--- /dev/null
+++ b/.jaiph/libs/jaiphlang/artifacts.jh
@@ -0,0 +1,36 @@
+#!/usr/bin/env jaiph
+
+#
+# Artifact publishing for Jaiph workflows.
+# Copies files from the workspace into ${JAIPH_ARTIFACTS_DIR} so they
+# survive sandbox teardown and are readable on the host at
+# .jaiph/runs/<run_id>/artifacts/.
+#
+# Usage:
+#   import "jaiphlang/artifacts" as artifacts
+#
+#   workflow default() {
+#     run artifacts.save("./build/output.bin", "build-output.bin")
+#     run artifacts.save_patch("snapshot.patch")
+#   }
+#
+import script "./artifacts.sh" as artifacts
+
+# Copies the file at `local_path` into the artifacts directory under `name`.
+# Returns the absolute path of the saved artifact.
+export workflow save(local_path, name) {
+  return run artifacts("save", local_path, name)
+}
+
+# Runs `git diff` (working tree vs HEAD, excluding .jaiph/) and writes
+# the patch to the artifacts directory under `name`.
+# Returns the absolute path of the saved patch file.
+export workflow save_patch(name) {
+  return run artifacts("save_patch", name)
+}
+
+# Applies a patch file to the current workspace via `git apply`.
+# Useful for replaying artifacts across runs.
+export workflow apply_patch(path) {
+  run artifacts("apply_patch", path)
+}
diff --git a/.jaiph/libs/jaiphlang/artifacts.sh b/.jaiph/libs/jaiphlang/artifacts.sh
new file mode 100755
index 00000000..054a5d25
--- /dev/null
+++ b/.jaiph/libs/jaiphlang/artifacts.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+#
+# Artifacts helper for Jaiph workflows.
+# Reads JAIPH_ARTIFACTS_DIR to locate the writable artifacts directory.
+# Works identically inside the Docker sandbox and on the host.
+#
+set -euo pipefail
+
+ARTIFACTS_DIR="${JAIPH_ARTIFACTS_DIR:?JAIPH_ARTIFACTS_DIR is not set}"
+
+cmd_save() {
+  local src="$1" name="$2"
+  if [[ ! -f "${src}" ]]; then
+    printf 'artifacts save: file not found: %s\n' "${src}" >&2
+    exit 1
+  fi
+  local dest="${ARTIFACTS_DIR}/${name}"
+  mkdir -p "$(dirname "${dest}")"
+  cp -- "${src}" "${dest}"
+  printf '%s' "${dest}"
+}
+
+cmd_save_patch() {
+  local name="$1"
+  local dest="${ARTIFACTS_DIR}/${name}"
+  mkdir -p "$(dirname "${dest}")"
+  # Exclude .jaiph/ from the produced patch — the runtime writes its own
+  # state under .jaiph/ and including it would clobber state on apply.
+  local diff_out
+  diff_out="$(git diff HEAD -- . ':!.jaiph/' 2>/dev/null || true)"
+  if [[ -z "${diff_out}" ]]; then
+    # Also check for untracked files (intent-to-add)
+    git add -N . -- ':!.jaiph/' 2>/dev/null || true
+    diff_out="$(git diff HEAD -- . ':!.jaiph/' 2>/dev/null || true)"
+    # Reset intent-to-add to avoid side effects
+    git reset HEAD -- . 2>/dev/null || true
+  fi
+  if [[ -n "${diff_out}" ]]; then
+    printf '%s\n' "${diff_out}" > "${dest}"
+  else
+    # Empty/clean workspace — create empty file
+    : > "${dest}"
+  fi
+  printf '%s' "${dest}"
+}
+
+cmd_apply_patch() {
+  local patch_path="$1"
+  if [[ ! -f "${patch_path}" ]]; then
+    printf 'artifacts apply_patch: patch file not found: %s\n' "${patch_path}" >&2
+    exit 1
+  fi
+  if [[ ! -s "${patch_path}" ]]; then
+    printf 'artifacts apply_patch: patch file is empty: %s\n' "${patch_path}" >&2
+    exit 1
+  fi
+  git apply "${patch_path}"
+}
+
+# -- dispatch ----------------------------------------------------------------
+cmd="${1:-}"
+shift || true
+
+case "${cmd}" in
+  save)         cmd_save "$@" ;;
+  save_patch)   cmd_save_patch "$@" ;;
+  apply_patch)  cmd_apply_patch "$@" ;;
+  *)
+    printf 'Usage: artifacts <save|save_patch|apply_patch> [args...]\n' >&2
+    exit 1
+    ;;
+esac
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 20beb513..6382476d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ## All changes
 
+- **Feature — Runtime/Library:** Artifacts — runtime mount and `artifacts.jh` library for publishing files out of the sandbox — Workflows can now publish files from inside the Docker sandbox (or host workspace) to a host-readable location at `.jaiph/runs/<run_id>/artifacts/`. The feature is split across two layers. **Runtime layer:** the `NodeWorkflowRuntime` creates the `artifacts/` subdirectory under the run directory before the first workflow step and exposes its path via `JAIPH_ARTIFACTS_DIR` (resolves to `/jaiph/run/artifacts` inside the Docker sandbox, `<host_run_dir>/artifacts` on the host). The existing `/jaiph/run` mount in Docker mode already maps this directory to the host — no new mount is needed. **Library layer:** a new built-in library `.jaiph/libs/jaiphlang/artifacts.jh` (paired with `artifacts.sh`) provides three `export workflow` entries: `save(local_path, name)` copies a file into the artifacts directory; `save_patch(name)` runs `git diff` (excluding `.jaiph/`) and writes the patch; `apply_patch(path)` applies a patch via `git apply`. The library mirrors the existing `queue.jh` / `queue.py` pattern — `import script "./artifacts.sh" as artifacts` with dispatch by subcommand. The `.jaiph/` exclusion in `save_patch` prevents clobbering runtime state when a patch is applied. `JAIPH_ARTIFACTS_DIR` is cleaned from inherited env in `resolveRuntimeEnv` to prevent leaking across nested runs. Runtime unit tests verify `JAIPH_ARTIFACTS_DIR` is set, writable, and exists before workflow execution. E2E test (`129_artifacts_lib.sh`) exercises `save`, `save_patch`, `apply_patch`, clean-workspace patch, and invalid-patch failure. Implementation: `node-workflow-runtime.ts` (artifacts dir creation, env var), `env.ts` (env cleanup), `.jaiph/libs/jaiphlang/artifacts.jh` and `artifacts.sh` (library). Docs updated (`docs/libraries.md`, `docs/artifacts.md`, `docs/configuration.md`, `docs/index.html`).
 - **Feature — Language/Runtime:** `Handle<T>` value model for `run async` — `run async ref(args)` now returns a first-class `Handle<T>` value instead of being a fire-and-forget statement. `T` is the same return type the function would have under a synchronous `run`. Capture is supported: `const h = run async ref()`. The handle resolves to the eventual return value on first non-passthrough read (string interpolation, passing as argument to `run`, comparison, conditional branching, match subject, channel send). Passthrough operations (initial capture into `const`, re-assignment) do not force resolution. Once resolved, the handle is replaced in-place by the resolved string value; subsequent reads return the cached value. Workflow exit implicitly joins all remaining unresolved handles created in that scope — this is not an error and preserves backward compatibility. `recover` composition works with `run async`: `run async foo() recover(err) { … }` — the async branch retries using the same retry-limit semantics as non-async `recover` (default 10, configurable via `run.recover_limit`). `catch` also works for single-shot recovery. The parser accepts `recover(err) { … }` and `catch(err) { … }` after `run async ref(args)` (the previous attempt silently rejected this with a "trailing content" error). There is no fire-and-forget mode — every `run async` creates a handle tracked by the runtime. No explicit `await` keyword — resolution is implicit on first read or at workflow exit. The docs-site Jaiph syntax highlighter (`docs/assets/js/main.js`) recognizes `async` as a keyword. Implementation: `Handle<T>` registry in `NodeWorkflowRuntime` (`createHandle`, `resolveHandleResult`, `resolveHandleVar`, `resolveHandlesInInput`), `async` flag on `run_capture` const RHS in `src/types.ts`, async capture parsing in `src/parse/const-rhs.ts`, `recover`/`catch` parsing for `run async` in `src/parse/workflows.ts`, formatter round-trip in `src/format/emit.ts`. Spec: `docs/spec-async-handles.md`. Parser, formatter, runtime, and E2E tests added. Docs updated (`docs/language.md`, `docs/grammar.md`, `docs/jaiph-skill.md`, `docs/index.html`).
 - **Feature — Language/Runtime:** `recover` loop semantics for `run` steps — `recover` is a new first-class repair-and-retry primitive for `run` steps, distinct from `catch`. Syntax: `run ref() recover(err) { … }`. On failure, the binding receives merged stdout+stderr, the repair body executes, and the target is retried automatically. The loop stops when the target succeeds or the retry limit is exhausted. The default retry limit is 10; override per-module with `run.recover_limit` in a `config` block. `catch` remains unchanged (one-shot recovery). `recover` and `catch` are mutually exclusive on the same step. Supported for non-isolated, non-async `run` in workflows only. The docs-site syntax highlighter (`docs/assets/js/main.js`) recognizes `recover` as a keyword. Implementation: `recoverLoop` field on `WorkflowStepDef` in `src/types.ts`, `parseRunRecoverStep` in `src/parse/steps.ts`, retry loop in `NodeWorkflowRuntime`, `run.recover_limit` config key in `src/parse/metadata.ts`, formatter round-trip in `src/format/emit.ts`, validation in `src/transpile/validate.ts`. Parser, formatter, validation, runtime, and E2E tests added. Docs updated (`docs/language.md`, `docs/grammar.md`, `docs/configuration.md`, `docs/jaiph-skill.md`, `docs/index.html`).
 - **Feature — Docker:** Workspace immutability contract and patch export — Docker runs now enforce an explicit immutability contract: the host workspace is bind-mounted read-only and the writable `/jaiph/workspace` inside the container is a sandbox-local copy-on-write layer discarded on exit. The only persistence channel from a Docker run to the host is the run-artifacts directory (`/jaiph/run` → host `.jaiph/runs`). During teardown, the runtime automatically exports a `workspace.patch` file (best-effort `git diff --binary` after `git add -N .`) into the run directory so sandbox edits can be reviewed or applied on the host with `git apply`. Patch export is runtime teardown behavior owned by `NodeWorkflowRuntime`, not workflow logic — it runs regardless of workflow exit status and failures are reported on stderr without changing the workflow's reported status. When there are no workspace changes, `workspace.patch` is omitted (not created). Non-Docker (local) runs are unaffected. Implementation: `exportWorkspacePatch()` in `src/runtime/docker.ts`, `exportPatchIfDocker()` in `src/runtime/kernel/node-workflow-runtime.ts`. Unit tests for non-empty patch, empty patch, and non-git directory added. Docs updated (`docs/sandboxing.md`, `docs/architecture.md`, `docs/artifacts.md`).
diff --git a/QUEUE.md b/QUEUE.md
index e336f7dd..7d63a603 100644
--- a/QUEUE.md
+++ b/QUEUE.md
@@ -13,75 +13,6 @@ Process rules:
 
 ***
 
-## Artifacts — runtime mount + `artifacts.jh` lib for publishing files out of the sandbox #dev-ready
-
-**Goal**
-Give workflows a clean, versatile way to publish files from inside the whole-program Docker sandbox to a host-readable location. Split the work across two layers:
-
-* **Runtime layer** (in `src/runtime/`): expose a writable artifacts directory inside the sandbox at a stable path, mapped to `.jaiph/runs/<run_id>/artifacts/` on the host. No new language primitive; the runtime's only job is to mount and to surface the path via env var.
-* **Library layer** (in `.jaiph/libs/jaiphlang/`): ship a new `artifacts.jh` lib (mirroring the existing `queue.jh` / `queue.py` pair) with `export workflow` entries for the common operations. Userspace imports the lib explicitly:
-
-  ```jh
-  import "jaiphlang/artifacts.jh" as artifacts
-
-  workflow default() {
-    run artifacts.save("./build/output.bin", "build-output.bin")
-    run artifacts.save_patch("snapshot.patch")
-  }
-  ```
-
-This keeps the runtime minimal (just a mount), makes the surface library-shaped (so it's discoverable and replaceable), and matches the established `queue.jh` pattern.
-
-**Context (read before starting)**
-
-* Today's whole-program Docker sandbox in `src/runtime/docker.ts` already mounts the run directory writable at `/jaiph/run`. Artifacts will live in a subdirectory of that mount; no new mount is needed.
-* The existing lib pattern is `.jaiph/libs/jaiphlang/queue.jh` paired with `.jaiph/libs/jaiphlang/queue.py` (a small Python helper invoked via `import script ... as queue`). Follow that pattern.
-* The `isolated` keyword is not part of this codebase. This task is about the whole-program Docker sandbox only; no per-call isolation primitive exists or is to be introduced.
-
-**Scope**
-
-**Runtime layer:**
-
-* Ensure `.jaiph/runs/<run_id>/artifacts/` exists on the host before the sandbox starts (`mkdirSync` with `recursive: true`).
-* The existing `/jaiph/run` mount in the container already exposes the artifacts subdirectory implicitly. Verify it does, and that writes inside the container land at `.jaiph/runs/<run_id>/artifacts/` on the host.
-* Surface the in-container artifacts path to userspace via an env var. Suggested name: `JAIPH_ARTIFACTS_DIR` (defaulting to `/jaiph/run/artifacts` in the container, `<host_run_dir>/artifacts` on the host when running without the sandbox). The library reads this env var rather than hardcoding the path.
-* When running on the host (no sandbox), `JAIPH_ARTIFACTS_DIR` points at the host artifacts directory directly so the same lib works.
-
-**Library layer:**
-
-* Add `.jaiph/libs/jaiphlang/artifacts.jh` and `.jaiph/libs/jaiphlang/artifacts.py` (or `.sh` if it stays a one-liner). Mirror the `queue.jh` / `queue.py` shape exactly — no novel patterns.
-* Provide these `export workflow` entries:
-  - `save(local_path, name)` — copies the file at `local_path` into `${JAIPH_ARTIFACTS_DIR}/${name}`. Returns the host-resolved absolute path as a string.
-  - `save_patch(name)` — runs `git diff` (working tree vs HEAD) inside the sandbox workspace, writes it to `${JAIPH_ARTIFACTS_DIR}/${name}`. Returns the host-resolved absolute path.
-  - `apply_patch(path)` — applies a patch file to the current workspace via `git apply`. Useful for replaying artifacts across runs.
-* The lib must work both inside the sandbox and on the host (when the user runs `jaiph` without the Docker sandbox). The only difference is what `JAIPH_ARTIFACTS_DIR` resolves to.
-* Document that `save_patch` excludes `.jaiph/` from the produced patch (the runtime writes its own state under `.jaiph/`; including it in a patch would clobber state on apply). The exclusion lives in the lib's helper script, not in the runtime, and is documented inline next to the implementation.
-
-**Required tests**
-
-* **Runtime tests**:
-  - `JAIPH_ARTIFACTS_DIR` is set inside the sandbox and points at a writable directory.
-  - `JAIPH_ARTIFACTS_DIR` is set when running on the host (no sandbox) and points at `.jaiph/runs/<run_id>/artifacts/`.
-  - The artifacts directory exists before the sandbox starts (no race where the lib tries to write before the dir exists).
-* **Library tests**:
-  - `artifacts.save(local_path, name)`: file is created at the host path; return value matches that path; file content equals the source.
-  - `artifacts.save_patch(name)`: produces a non-empty patch when the workspace has uncommitted changes; produces an empty (or absent) patch when the workspace is clean; the patch does not reference `.jaiph/` even when `.jaiph/` files have changed.
-  - `artifacts.apply_patch(path)`: applies a previously-saved patch cleanly; fails with a clear error when the patch does not apply.
-* **End-to-end**:
-  - One `.jh` example workflow that imports `jaiphlang/artifacts.jh`, calls `artifacts.save` and `artifacts.save_patch`, runs under the sandbox, and the test asserts both files appear on the host at the expected paths.
-
-**Acceptance criteria**
-
-* `.jaiph/runs/<run_id>/artifacts/` exists, is writable from inside the sandbox, and survives sandbox teardown (it's on the host filesystem via the existing mount).
-* `JAIPH_ARTIFACTS_DIR` is exposed in both sandbox and host execution; the lib reads it rather than hardcoding paths.
-* `.jaiph/libs/jaiphlang/artifacts.jh` ships with `save`, `save_patch`, `apply_patch` as `export workflow` entries, mirroring the `queue.jh` lib shape.
-* The lib works identically inside the sandbox and on the host.
-* `save_patch`'s `.jaiph/` exclusion is documented inline in the helper script.
-* No new runtime language primitive is introduced. The user-facing surface is `import` + workflow calls.
-* The docs-site documentation is updated to describe the artifacts lib alongside the queue lib (`docs/libraries.md` or equivalent).
-
-***
-
 ## Runtime — PTY-based TTY test for `run async` #dev-ready
 
 **Goal**
diff --git a/docs/artifacts.md b/docs/artifacts.md
index 62abad47..241e0593 100644
--- a/docs/artifacts.md
+++ b/docs/artifacts.md
@@ -19,6 +19,7 @@ The runtime uses a UTC-dated hierarchy. Each run gets its own folder: date, then
     <HH-MM-SS>-<source-basename>/       # UTC time + JAIPH_SOURCE_FILE or entry basename
       000001-module__step.out          # stdout capture per step (seq-prefixed)
       000001-module__step.err          # stderr capture (when non-empty)
+      artifacts/                       # user-published artifacts (via jaiphlang/artifacts lib)
       inbox/                           # inbox message files (when channels are used)
       .seq                             # step-sequence counter (kernel/seq-alloc.ts)
       run_summary.jsonl                # durable event timeline
@@ -33,6 +34,7 @@ Sequence prefixes are **monotonic and unique** per run (allocated in the kernel)
 - **`run_summary.jsonl`** — Append-only JSONL timeline: workflow boundaries, step start/end, structured log lines, inbox-related events. Useful for tooling and post-run analysis.
 - **`inbox/`** — When you use channels, message payloads can be reflected as files under the run for inspection (see [Inbox & Dispatch](inbox.md)).
 - **`.seq`** — Internal counter backing the numeric prefixes; you normally do not edit it.
+- **`artifacts/`** — User-published files created by the `jaiphlang/artifacts` library. The runtime creates this directory before the first workflow step runs and exposes its path via `JAIPH_ARTIFACTS_DIR`. Files written here survive sandbox teardown because the directory is on the host filesystem (mapped via the existing `/jaiph/run` mount in Docker mode). See [Libraries — `jaiphlang/artifacts`](libraries.md#jaiphlangartifacts--publishing-files-out-of-the-sandbox).
 - **`workspace.patch`** — (Docker runs only) A `git diff --binary` patch capturing all workspace modifications made during the run. Generated automatically during runtime teardown when Docker sandboxing is enabled and the workspace has changes. The patch is sufficient to review or `git apply` on the host. Omitted when there are no workspace changes. See [Sandboxing — Workspace patch export](sandboxing.md#runtime-behavior).
 
 ## Keeping runs out of git
diff --git a/docs/configuration.md b/docs/configuration.md
index 56fc5266..f377c7fa 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -344,6 +344,8 @@ workflow default() {
 }
 ```
 
+The runtime also sets `JAIPH_ARTIFACTS_DIR` — the absolute path to the writable artifacts directory for the current run (`.jaiph/runs/<run_id>/artifacts/` on the host, `/jaiph/run/artifacts` inside the Docker sandbox). The `jaiphlang/artifacts` library reads this variable; you can also use it directly in scripts. See [Libraries — `jaiphlang/artifacts`](libraries.md#jaiphlangartifacts--publishing-files-out-of-the-sandbox).
+
 `JAIPH_DOCKER_*` variables are **not** populated from in-file `runtime.*` inside the workflow runner process. Docker is configured when the CLI spawns the runner (or container). If you need Docker-related variables inside a `script` step, export them yourself or inherit them from the parent shell.
 
 ## Created by `jaiph init`
diff --git a/docs/index.html b/docs/index.html
index ae15903e..fba5fb85 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -480,6 +480,11 @@ <h3>Runtime</h3>
                     executable — a shell script, a Python wrapper, or your own CLI tool — and Jaiph will
                     pipe the prompt via stdin and capture raw stdout as the response. No JSON stream
                     protocol required; just read stdin and print your answer.</p>
+                <p><strong>Artifacts library.</strong> Publish files from inside the sandbox to a host-readable
+                    location with the built-in <code>jaiphlang/artifacts</code> library. <code>artifacts.save</code>
+                    copies a file, <code>artifacts.save_patch</code> exports a git diff, and
+                    <code>artifacts.apply_patch</code> replays it. Works identically in Docker and on the host.
+                    See <a href="libraries#jaiphlangartifacts--publishing-files-out-of-the-sandbox">Libraries</a>.</p>
                 <p><strong>Configuration.</strong> Control behavior with <code>config { ... }</code> blocks
                     at the module level or inside individual workflows for per-workflow overrides, plus environment
                     variables (env wins precedence). See <a href="configuration">Configuration</a> and
diff --git a/docs/libraries.md b/docs/libraries.md
index 23f466f6..c4357d1a 100644
--- a/docs/libraries.md
+++ b/docs/libraries.md
@@ -37,3 +37,46 @@ workflow default() {
 ```
 
 The import resolver tries relative paths first (same as local modules), then falls back to `.jaiph/libs/`. See [CLI — `jaiph install`](cli.md#jaiph-install) for flags, lockfile format, and edge cases.
+
+## Built-in libraries (`jaiphlang/`)
+
+The `jaiphlang/` namespace ships with Jaiph and provides standard workflow utilities. These libraries live under `.jaiph/libs/jaiphlang/` and follow the same `import` + `export workflow` pattern as user-installed libraries.
+
+### `jaiphlang/queue` — task queue management
+
+Reads and modifies a `QUEUE.md` file in the workspace root. See the source at `.jaiph/libs/jaiphlang/queue.jh` for the full API.
+
+### `jaiphlang/artifacts` — publishing files out of the sandbox
+
+Copies files from inside the workflow sandbox (or host workspace) to `.jaiph/runs/<run_id>/artifacts/`, a host-readable location that survives sandbox teardown.
+
+The runtime exposes `JAIPH_ARTIFACTS_DIR` pointing at the writable artifacts directory. The library reads this env var — it works identically inside the Docker sandbox and on the host.
+
+```jaiph
+import "jaiphlang/artifacts" as artifacts
+
+workflow default() {
+  # Copy a file into the artifacts directory under a chosen name.
+  # Returns the absolute path of the saved artifact.
+  const path = run artifacts.save("./build/output.bin", "build-output.bin")
+
+  # Produce a git diff (excluding .jaiph/) and save it as a patch.
+  # Returns the absolute path of the saved patch file.
+  const patch = run artifacts.save_patch("snapshot.patch")
+
+  # Apply a previously-saved patch to the current workspace.
+  run artifacts.apply_patch(patch)
+}
+```
+
+**Exported workflows:**
+
+| Workflow | Description |
+|---|---|
+| `save(local_path, name)` | Copies the file at `local_path` into `${JAIPH_ARTIFACTS_DIR}/${name}`. Returns the host-resolved absolute path. |
+| `save_patch(name)` | Runs `git diff` (working tree vs HEAD, excluding `.jaiph/`) and writes it to `${JAIPH_ARTIFACTS_DIR}/${name}`. Returns the absolute path. |
+| `apply_patch(path)` | Applies a patch file to the current workspace via `git apply`. Fails with a clear error when the patch does not apply. |
+
+**Notes:**
+- `save_patch` excludes `.jaiph/` from the produced patch. The runtime writes its own state under `.jaiph/`; including it in a patch would clobber state on apply.
+- When the workspace is clean, `save_patch` produces an empty file.
diff --git a/e2e/tests/129_artifacts_lib.sh b/e2e/tests/129_artifacts_lib.sh
new file mode 100755
index 00000000..230f8dcd
--- /dev/null
+++ b/e2e/tests/129_artifacts_lib.sh
@@ -0,0 +1,152 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+source "${ROOT_DIR}/e2e/lib/common.sh"
+trap e2e::cleanup EXIT
+
+e2e::prepare_test_env "artifacts_lib"
+TEST_DIR="${JAIPH_E2E_TEST_DIR}"
+
+# ---------------------------------------------------------------------------
+e2e::section "artifacts lib: save and save_patch"
+# ---------------------------------------------------------------------------
+
+# Set up a git repo so save_patch works
+cd "${TEST_DIR}"
+e2e::git_init
+git config user.email "test@test.com"
+git config user.name "test"
+
+# Install the jaiphlang lib into the test workspace
+mkdir -p "${TEST_DIR}/.jaiph/libs/jaiphlang"
+cp "${ROOT_DIR}/.jaiph/libs/jaiphlang/artifacts.jh" "${TEST_DIR}/.jaiph/libs/jaiphlang/artifacts.jh"
+cp "${ROOT_DIR}/.jaiph/libs/jaiphlang/artifacts.sh" "${TEST_DIR}/.jaiph/libs/jaiphlang/artifacts.sh"
+chmod +x "${TEST_DIR}/.jaiph/libs/jaiphlang/artifacts.sh"
+
+# Create a source file to save as an artifact
+printf 'build-output-content' > "${TEST_DIR}/build_output.txt"
+
+# Create uncommitted changes for save_patch
+printf 'new-file-content\n' > "${TEST_DIR}/tracked.txt"
+git add tracked.txt
+git commit -m "initial" --quiet
+printf 'modified-content\n' > "${TEST_DIR}/tracked.txt"
+
+# Create .jaiph/some-state to verify it's excluded from the patch
+mkdir -p "${TEST_DIR}/.jaiph"
+printf 'runtime-state\n' > "${TEST_DIR}/.jaiph/some_state.txt"
+
+# Create the workflow
+e2e::file "artifacts_e2e.jh" <<'EOF'
+import "jaiphlang/artifacts" as artifacts
+
+workflow default() {
+  const save_path = run artifacts.save("./build_output.txt", "saved-output.txt")
+  log save_path
+  const patch_path = run artifacts.save_patch("workspace.patch")
+  log patch_path
+}
+EOF
+
+# When
+artifacts_out="$(e2e::run "artifacts_e2e.jh")"
+
+# Then — CLI tree output
+# assert_contains: log lines include absolute run-dir paths that vary per invocation;
+# param values include file paths that vary per environment
+e2e::assert_contains "${artifacts_out}" "workflow default" "output contains workflow default"
+e2e::assert_contains "${artifacts_out}" "workflow save" "output contains workflow save"
+e2e::assert_contains "${artifacts_out}" "workflow save_patch" "output contains workflow save_patch"
+e2e::assert_contains "${artifacts_out}" "PASS" "output contains PASS"
+
+# Then — artifacts exist on host
+run_dir="$(e2e::run_dir "artifacts_e2e.jh")"
+artifacts_dir="${run_dir}artifacts"
+
+e2e::assert_file_exists "${artifacts_dir}/saved-output.txt" "saved artifact exists"
+saved_content="$(<"${artifacts_dir}/saved-output.txt")"
+e2e::assert_equals "${saved_content}" "build-output-content" "saved artifact content matches source"
+
+e2e::assert_file_exists "${artifacts_dir}/workspace.patch" "patch artifact exists"
+patch_content="$(<"${artifacts_dir}/workspace.patch")"
+# assert_contains: patch content includes git diff headers with hashes that vary
+e2e::assert_contains "${patch_content}" "modified-content" "patch contains workspace changes"
+
+# Verify .jaiph/ is excluded from the patch
+if [[ "${patch_content}" == *".jaiph/"* ]]; then
+  e2e::fail "patch should exclude .jaiph/ paths"
+fi
+e2e::pass "patch excludes .jaiph/ paths"
+
+# ---------------------------------------------------------------------------
+e2e::section "artifacts lib: apply_patch"
+# ---------------------------------------------------------------------------
+
+# Reset the tracked file to original content
+printf 'new-file-content\n' > "${TEST_DIR}/tracked.txt"
+
+# Apply the previously saved patch
+e2e::file "apply_patch_e2e.jh" <<EOF
+import "jaiphlang/artifacts" as artifacts
+
+workflow default() {
+  run artifacts.apply_patch("${artifacts_dir}/workspace.patch")
+}
+EOF
+
+apply_out="$(e2e::run "apply_patch_e2e.jh")"
+
+# Then — the patch was applied
+applied_content="$(<"${TEST_DIR}/tracked.txt")"
+e2e::assert_equals "${applied_content}" "modified-content" "patch applied successfully"
+
+# ---------------------------------------------------------------------------
+e2e::section "artifacts lib: apply_patch fails on bad patch"
+# ---------------------------------------------------------------------------
+
+printf 'not-a-valid-patch\n' > "${TEST_DIR}/bad.patch"
+
+e2e::file "bad_patch_e2e.jh" <<EOF
+import "jaiphlang/artifacts" as artifacts
+
+workflow default() {
+  run artifacts.apply_patch("${TEST_DIR}/bad.patch")
+}
+EOF
+
+if e2e::run "bad_patch_e2e.jh" >/dev/null 2>&1; then
+  e2e::fail "apply_patch should fail on invalid patch"
+fi
+e2e::pass "apply_patch fails on invalid patch"
+
+# ---------------------------------------------------------------------------
+e2e::section "artifacts lib: save_patch on clean workspace"
+# ---------------------------------------------------------------------------
+
+cd "${TEST_DIR}"
+git checkout -- tracked.txt 2>/dev/null || git restore tracked.txt 2>/dev/null || true
+# Clean untracked files but preserve .jaiph/ (contains the lib we need)
+git clean -fd --exclude=.jaiph 2>/dev/null || true
+
+# Create the workflow file after clean, then commit so workspace is clean
+e2e::file "clean_patch_e2e.jh" <<'EOF'
+import "jaiphlang/artifacts" as artifacts
+
+workflow default() {
+  const patch_path = run artifacts.save_patch("clean.patch")
+  log patch_path
+}
+EOF
+
+git add clean_patch_e2e.jh
+git commit -m "add clean test" --quiet
+
+e2e::run "clean_patch_e2e.jh" >/dev/null
+
+clean_run_dir="$(e2e::run_dir "clean_patch_e2e.jh")"
+clean_patch="${clean_run_dir}artifacts/clean.patch"
+e2e::assert_file_exists "${clean_patch}" "clean patch file exists"
+clean_patch_content="$(<"${clean_patch}")"
+e2e::assert_equals "${clean_patch_content}" "" "clean workspace produces empty patch"
diff --git a/src/cli/run/env.ts b/src/cli/run/env.ts
index 2dbac4ef..0837ec15 100644
--- a/src/cli/run/env.ts
+++ b/src/cli/run/env.ts
@@ -72,6 +72,7 @@ export function resolveRuntimeEnv(
   delete env.BASH_ENV;
   delete env.JAIPH_META_FILE;
   delete env.JAIPH_RUN_DIR;
+  delete env.JAIPH_ARTIFACTS_DIR;
   delete env.JAIPH_PRECEDING_FILES;
   delete env.JAIPH_RUN_SUMMARY_FILE;
   // A parent shell may export JAIPH_SCRIPTS for its own module (e.g. nested `jaiph run` → npm → tests).
diff --git a/src/runtime/kernel/node-workflow-runtime.artifacts.test.ts b/src/runtime/kernel/node-workflow-runtime.artifacts.test.ts
index 38fdc73b..d2dedaa1 100644
--- a/src/runtime/kernel/node-workflow-runtime.artifacts.test.ts
+++ b/src/runtime/kernel/node-workflow-runtime.artifacts.test.ts
@@ -452,6 +452,64 @@ test("NodeWorkflowRuntime: prompt STEP_START params include named vars reference
   }
 });
 
+test("NodeWorkflowRuntime: JAIPH_ARTIFACTS_DIR is set and points at writable artifacts/ subdir", async () => {
+  const root = mkdtempSync(join(tmpdir(), "jaiph-node-wf-artifacts-dir-"));
+  try {
+    const jh = join(root, "artifacts_env.jh");
+    writeFileSync(jh, 'workflow default() {\n  log "ok"\n}\n');
+
+    const graph = buildRuntimeGraph(jh);
+    const runsDir = join(root, ".jaiph", "runs");
+    const env: NodeJS.ProcessEnv = {
+      ...process.env,
+      JAIPH_TEST_MODE: "1",
+      JAIPH_RUNS_DIR: runsDir,
+    };
+    const runtime = new NodeWorkflowRuntime(graph, { env, cwd: root });
+    const runDir = runtime.getRunDir();
+    const artifactsDir = env.JAIPH_ARTIFACTS_DIR;
+
+    // JAIPH_ARTIFACTS_DIR is set and points at <runDir>/artifacts
+    assert.ok(artifactsDir, "JAIPH_ARTIFACTS_DIR should be set");
+    assert.equal(artifactsDir, join(runDir, "artifacts"));
+
+    // The directory exists before any workflow step runs
+    assert.ok(existsSync(artifactsDir!), "artifacts dir should exist on disk");
+
+    // It is writable
+    const probe = join(artifactsDir!, "probe.txt");
+    writeFileSync(probe, "test");
+    assert.equal(readFileSync(probe, "utf8"), "test");
+
+    runtime.stopHeartbeat();
+  } finally {
+    rmSync(root, { recursive: true, force: true });
+  }
+});
+
+test("NodeWorkflowRuntime: JAIPH_ARTIFACTS_DIR resolves under .jaiph/runs when JAIPH_RUNS_DIR is unset", async () => {
+  const root = mkdtempSync(join(tmpdir(), "jaiph-node-wf-artifacts-default-"));
+  try {
+    const jh = join(root, "artifacts_default.jh");
+    writeFileSync(jh, 'workflow default() {\n  log "ok"\n}\n');
+
+    const graph = buildRuntimeGraph(jh);
+    const env: NodeJS.ProcessEnv = { ...process.env, JAIPH_TEST_MODE: "1" };
+    delete env.JAIPH_RUNS_DIR;
+    const runtime = new NodeWorkflowRuntime(graph, { env, cwd: root });
+    const artifactsDir = env.JAIPH_ARTIFACTS_DIR;
+
+    assert.ok(artifactsDir, "JAIPH_ARTIFACTS_DIR should be set");
+    assert.ok(artifactsDir!.includes(join(".jaiph", "runs")), "should be under .jaiph/runs");
+    assert.ok(artifactsDir!.endsWith("/artifacts"), "should end with /artifacts");
+    assert.ok(existsSync(artifactsDir!), "artifacts dir should exist");
+
+    runtime.stopHeartbeat();
+  } finally {
+    rmSync(root, { recursive: true, force: true });
+  }
+});
+
 test("NodeWorkflowRuntime: heartbeat file created at construction, removed on stop", async () => {
   const root = mkdtempSync(join(tmpdir(), "jaiph-node-wf-heartbeat-"));
   try {
diff --git a/src/runtime/kernel/node-workflow-runtime.ts b/src/runtime/kernel/node-workflow-runtime.ts
index 76eccbec..b1e5d97e 100644
--- a/src/runtime/kernel/node-workflow-runtime.ts
+++ b/src/runtime/kernel/node-workflow-runtime.ts
@@ -411,11 +411,14 @@ export class NodeWorkflowRuntime {
     const runsRoot = this.resolveRunsRoot();
     this.runDir = join(runsRoot, datePart, `${timePart}-${source}`);
     mkdirSync(this.runDir, { recursive: true });
+    const artifactsDir = join(this.runDir, "artifacts");
+    mkdirSync(artifactsDir, { recursive: true });
     this.summaryFile = join(this.runDir, "run_summary.jsonl");
     writeFileSync(this.summaryFile, "");
     this.env.JAIPH_RUN_SUMMARY_FILE = this.summaryFile;
     this.env.JAIPH_RUN_ID = this.runId;
     this.env.JAIPH_RUN_DIR = this.runDir;
+    this.env.JAIPH_ARTIFACTS_DIR = artifactsDir;
     this.startHeartbeat();
   }
 

From 7a04ab05af859db577dcd959a13c50f991811b3a Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Mon, 20 Apr 2026 15:20:27 +0200
Subject: [PATCH 15/38] Test: Add PTY-based E2E test for run async progress
 rendering
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Exercise the live TTY progress tree path for `run async` workflows
under a real pseudo-terminal. The test spawns `jaiph run` with two
concurrent async branches (branch_a, branch_b), each emitting
deterministic progress events via log and script steps with sleeps.
A Python pty.openpty() harness captures the raw PTY stream and asserts
per-branch events render under correct subscript nodes (₁, ₂), resolved
Handle<T> return values appear in the final frame, and no orphaned ANSI
escape sequences survive after CSI stripping. This closes the regression
gap left by the sync-only 81_tty_progress_tree.sh test.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CHANGELOG.md                        |   1 +
 QUEUE.md                            |  35 ------
 docs/spec-async-handles.md          |   2 +
 docs/testing.md                     |  13 +++
 e2e/tests/131_tty_async_progress.sh | 175 ++++++++++++++++++++++++++++
 5 files changed, 191 insertions(+), 35 deletions(-)
 create mode 100755 e2e/tests/131_tty_async_progress.sh

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6382476d..ed61cc2c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ## All changes
 
+- **Test — E2E/Runtime:** PTY-based TTY test for `run async` progress rendering — New E2E test (`e2e/tests/131_tty_async_progress.sh`) exercises the live progress tree rendering path for `run async` workflows under a real PTY. The test spawns `jaiph run` with a workflow that fans out two concurrent async branches (`run async branch_a()`, `run async branch_b()`), each emitting deterministic progress events over time (log + script steps with sleep). A Python `pty.openpty()` harness captures the raw PTY stream and asserts: (1) each branch's progress events appear under the correct subscript-numbered node (₁, ₂) in the tree, (2) the final frame shows both branches completed with their resolved `Handle<T>` return values (`result-a`, `result-b`), (3) no orphaned ANSI escape sequences survive after CSI stripping, and (4) a `RUNNING` frame was observed during live rendering. The test uses only deterministic steps (no `prompt claude` or external dependencies) and `assert_contains` checks with order-insensitive matching to tolerate async interleaving. This closes a regression-coverage gap — the existing `81_tty_progress_tree.sh` covers synchronous workflows but not the async handle/deferred-resolution render path. Docs updated (`docs/testing.md`, `docs/spec-async-handles.md`).
 - **Feature — Runtime/Library:** Artifacts — runtime mount and `artifacts.jh` library for publishing files out of the sandbox — Workflows can now publish files from inside the Docker sandbox (or host workspace) to a host-readable location at `.jaiph/runs/<run_id>/artifacts/`. The feature is split across two layers. **Runtime layer:** the `NodeWorkflowRuntime` creates the `artifacts/` subdirectory under the run directory before the first workflow step and exposes its path via `JAIPH_ARTIFACTS_DIR` (resolves to `/jaiph/run/artifacts` inside the Docker sandbox, `<host_run_dir>/artifacts` on the host). The existing `/jaiph/run` mount in Docker mode already maps this directory to the host — no new mount is needed. **Library layer:** a new built-in library `.jaiph/libs/jaiphlang/artifacts.jh` (paired with `artifacts.sh`) provides three `export workflow` entries: `save(local_path, name)` copies a file into the artifacts directory; `save_patch(name)` runs `git diff` (excluding `.jaiph/`) and writes the patch; `apply_patch(path)` applies a patch via `git apply`. The library mirrors the existing `queue.jh` / `queue.py` pattern — `import script "./artifacts.sh" as artifacts` with dispatch by subcommand. The `.jaiph/` exclusion in `save_patch` prevents clobbering runtime state when a patch is applied. `JAIPH_ARTIFACTS_DIR` is cleaned from inherited env in `resolveRuntimeEnv` to prevent leaking across nested runs. Runtime unit tests verify `JAIPH_ARTIFACTS_DIR` is set, writable, and exists before workflow execution. E2E test (`129_artifacts_lib.sh`) exercises `save`, `save_patch`, `apply_patch`, clean-workspace patch, and invalid-patch failure. Implementation: `node-workflow-runtime.ts` (artifacts dir creation, env var), `env.ts` (env cleanup), `.jaiph/libs/jaiphlang/artifacts.jh` and `artifacts.sh` (library). Docs updated (`docs/libraries.md`, `docs/artifacts.md`, `docs/configuration.md`, `docs/index.html`).
 - **Feature — Language/Runtime:** `Handle<T>` value model for `run async` — `run async ref(args)` now returns a first-class `Handle<T>` value instead of being a fire-and-forget statement. `T` is the same return type the function would have under a synchronous `run`. Capture is supported: `const h = run async ref()`. The handle resolves to the eventual return value on first non-passthrough read (string interpolation, passing as argument to `run`, comparison, conditional branching, match subject, channel send). Passthrough operations (initial capture into `const`, re-assignment) do not force resolution. Once resolved, the handle is replaced in-place by the resolved string value; subsequent reads return the cached value. Workflow exit implicitly joins all remaining unresolved handles created in that scope — this is not an error and preserves backward compatibility. `recover` composition works with `run async`: `run async foo() recover(err) { … }` — the async branch retries using the same retry-limit semantics as non-async `recover` (default 10, configurable via `run.recover_limit`). `catch` also works for single-shot recovery. The parser accepts `recover(err) { … }` and `catch(err) { … }` after `run async ref(args)` (the previous attempt silently rejected this with a "trailing content" error). There is no fire-and-forget mode — every `run async` creates a handle tracked by the runtime. No explicit `await` keyword — resolution is implicit on first read or at workflow exit. The docs-site Jaiph syntax highlighter (`docs/assets/js/main.js`) recognizes `async` as a keyword. Implementation: `Handle<T>` registry in `NodeWorkflowRuntime` (`createHandle`, `resolveHandleResult`, `resolveHandleVar`, `resolveHandlesInInput`), `async` flag on `run_capture` const RHS in `src/types.ts`, async capture parsing in `src/parse/const-rhs.ts`, `recover`/`catch` parsing for `run async` in `src/parse/workflows.ts`, formatter round-trip in `src/format/emit.ts`. Spec: `docs/spec-async-handles.md`. Parser, formatter, runtime, and E2E tests added. Docs updated (`docs/language.md`, `docs/grammar.md`, `docs/jaiph-skill.md`, `docs/index.html`).
 - **Feature — Language/Runtime:** `recover` loop semantics for `run` steps — `recover` is a new first-class repair-and-retry primitive for `run` steps, distinct from `catch`. Syntax: `run ref() recover(err) { … }`. On failure, the binding receives merged stdout+stderr, the repair body executes, and the target is retried automatically. The loop stops when the target succeeds or the retry limit is exhausted. The default retry limit is 10; override per-module with `run.recover_limit` in a `config` block. `catch` remains unchanged (one-shot recovery). `recover` and `catch` are mutually exclusive on the same step. Supported for non-isolated, non-async `run` in workflows only. The docs-site syntax highlighter (`docs/assets/js/main.js`) recognizes `recover` as a keyword. Implementation: `recoverLoop` field on `WorkflowStepDef` in `src/types.ts`, `parseRunRecoverStep` in `src/parse/steps.ts`, retry loop in `NodeWorkflowRuntime`, `run.recover_limit` config key in `src/parse/metadata.ts`, formatter round-trip in `src/format/emit.ts`, validation in `src/transpile/validate.ts`. Parser, formatter, validation, runtime, and E2E tests added. Docs updated (`docs/language.md`, `docs/grammar.md`, `docs/configuration.md`, `docs/jaiph-skill.md`, `docs/index.html`).
diff --git a/QUEUE.md b/QUEUE.md
index 7d63a603..2f82205a 100644
--- a/QUEUE.md
+++ b/QUEUE.md
@@ -13,41 +13,6 @@ Process rules:
 
 ***
 
-## Runtime — PTY-based TTY test for `run async` #dev-ready
-
-**Goal**
-Live progress for `run async` (with handles, deferred resolution, multi-branch fan-out without isolation) takes a different render path than synchronous steps. Close the regression-coverage gap by exercising that path through a real PTY.
-
-**Context (read before starting)**
-
-`e2e/tests/81_tty_progress_tree.sh` already uses Python's `pty.openpty()` to drive `jaiph run` under a real TTY and asserts on the rendered progress frames. It covers non-async workflows. There is no equivalent for `run async`. The host progress renderer takes a different path for async (handles, deferred resolution, multiple in-flight calls competing for the live frame), and that path has been broken before without any test catching it.
-
-**Scope**
-
-* Add an e2e test (sibling of `e2e/tests/81_tty_progress_tree.sh`) that:
-  * spawns `jaiph run` under a real PTY,
-  * exercises a workflow that uses `run async branch()` with at least two concurrent async calls,
-  * each branch emits multiple progress events over time (use a deterministic step like a sleep loop with `print` calls — do not depend on `prompt claude` or any other non-deterministic step),
-  * captures the PTY output and asserts:
-    1. each branch's progress events appear under that branch's node in the tree as they happen,
-    2. the final frame shows both branches as completed with their resolved return values,
-    3. no ANSI corruption (orphaned escape sequences, stray cursor moves outside the rendered region).
-* The test must fail today against any regression that batches async progress events at branch completion, drops them, or scrambles the frame.
-
-**Non-goals**
-
-* Do not test `prompt claude` or any non-deterministic step. Branches must emit synthetic, time-spaced events.
-* Do not assert on exact frame timing; assert on order and presence within a generous timeout.
-* No `isolated` variant — that keyword is not part of this codebase.
-
-**Acceptance criteria**
-
-* New test lives next to `e2e/tests/81_tty_progress_tree.sh` and follows the same shell-driving-Python-PTY pattern.
-* The test passes on a green build and fails when the live-progress path for `run async` regresses.
-* Test runs as part of the standard e2e suite (no separate invocation).
-
-***
-
 ## Cleanup — delete top-level debug cruft and harden `.gitignore` #dev-ready
 
 **Goal**
diff --git a/docs/spec-async-handles.md b/docs/spec-async-handles.md
index 57eb87d8..b5571b86 100644
--- a/docs/spec-async-handles.md
+++ b/docs/spec-async-handles.md
@@ -103,6 +103,8 @@ Async handles preserve the existing async progress/event visibility model:
 
 Handle resolution does not emit additional events beyond what the async branch already emits.
 
+The live TTY rendering path for async branches is covered by a dedicated PTY-based E2E test (`e2e/tests/131_tty_async_progress.sh`). It spawns `jaiph run` under a real pseudo-terminal with two concurrent async branches and asserts that per-branch events render under the correct subscript node, that resolved return values appear in the final frame, and that no ANSI escape corruption occurs. See [Testing — PTY-based TTY tests](testing.md#pty-based-tty-tests) for details.
+
 ## Constraints
 
 - `run async` is only allowed in workflows, not in rules.
diff --git a/docs/testing.md b/docs/testing.md
index 474aa9a9..2571ca96 100644
--- a/docs/testing.md
+++ b/docs/testing.md
@@ -360,6 +360,19 @@ For concurrency-sensitive behavior (for example parallel inbox dispatch), the re
 
 See `e2e/tests/91_inbox_dispatch.sh`, `e2e/tests/93_inbox_stress.sh`, and `e2e/tests/94_parallel_shell_steps.sh` for examples.
 
+## PTY-based TTY tests
+
+Some CLI behavior only activates when stdout is a real TTY — the live progress tree with ANSI redraws, for example. These tests use Python's `pty.openpty()` to spawn `jaiph run` under a pseudo-terminal, capture the raw byte stream, and assert on the rendered output.
+
+Two PTY tests exist today:
+
+| Test file | What it covers |
+|-----------|----------------|
+| `e2e/tests/81_tty_progress_tree.sh` | Synchronous workflow progress rendering — verifies the tree structure, step timing, and PASS/FAIL markers under a real TTY. |
+| `e2e/tests/131_tty_async_progress.sh` | Async workflow progress rendering — verifies that `run async` branches (with `Handle<T>` deferred resolution) render per-branch progress events under subscript-numbered nodes (₁, ₂), that both branches show resolved return values in the final frame, and that no orphaned ANSI escape sequences appear. |
+
+Both tests require Python 3 and use only deterministic, non-LLM steps (sleep loops, `log`, scripts) so results are reproducible. Assertions use `assert_contains` with order-insensitive matching because async interleaving and PTY redraws make exact full-output comparison infeasible.
+
 ## E2E testing
 
 Shell harnesses and CI expectations for the full repo are described in [Contributing — E2E testing](contributing.md#e2e-testing).
diff --git a/e2e/tests/131_tty_async_progress.sh b/e2e/tests/131_tty_async_progress.sh
new file mode 100755
index 00000000..415e9c01
--- /dev/null
+++ b/e2e/tests/131_tty_async_progress.sh
@@ -0,0 +1,175 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+source "${ROOT_DIR}/e2e/lib/common.sh"
+trap e2e::cleanup EXIT
+
+e2e::prepare_test_env "tty_async_progress"
+TEST_DIR="${JAIPH_E2E_TEST_DIR}"
+
+e2e::section "TTY run async renders per-branch progress events in real time"
+
+if ! command -v python3 >/dev/null 2>&1; then
+  e2e::fail "python3 is required for PTY TTY test (e2e/tests/131_tty_async_progress.sh)"
+fi
+
+# Given — two async branches, each emitting multiple progress events over time
+e2e::file "tty_async.jh" <<'EOF'
+script slow_a = `sleep 1 && echo "a-script-done"`
+
+script slow_b = `sleep 1 && echo "b-script-done"`
+
+workflow branch_a() {
+  log "a-start"
+  run slow_a()
+  log "a-end"
+  return "result-a"
+}
+
+workflow branch_b() {
+  log "b-start"
+  run slow_b()
+  log "b-end"
+  return "result-b"
+}
+
+workflow default() {
+  const ha = run async branch_a()
+  const hb = run async branch_b()
+  log ha
+  log hb
+}
+EOF
+
+# Spawn jaiph run under a real PTY so the CLI takes the TTY rendering path.
+set +e
+tty_out="$(
+  python3 - "${TEST_DIR}/tty_async.jh" <<'PY'
+import os
+import pty
+import re
+import select
+import subprocess
+import sys
+
+workflow_path = sys.argv[1]
+cmd = ["jaiph", "run", workflow_path]
+
+master_fd, slave_fd = pty.openpty()
+proc = subprocess.Popen(cmd, stdin=slave_fd, stdout=slave_fd, stderr=slave_fd, close_fds=True)
+os.close(slave_fd)
+
+chunks = []
+while True:
+    ready, _, _ = select.select([master_fd], [], [], 0.1)
+    if master_fd in ready:
+        try:
+            data = os.read(master_fd, 4096)
+        except OSError:
+            data = b""
+        if data:
+            chunks.append(data)
+    if proc.poll() is not None:
+        while True:
+            try:
+                data = os.read(master_fd, 4096)
+            except OSError:
+                break
+            if not data:
+                break
+            chunks.append(data)
+        break
+
+os.close(master_fd)
+captured = b"".join(chunks)
+text = captured.decode("utf-8", errors="ignore")
+# Normalize for robust detection across PTY redraw/control sequences
+text = text.replace("\r", "\n")
+clean = re.sub(r"\x1b\[[0-9;]*[A-Za-z]", "", text)
+
+# Check that RUNNING frame was observed during live render
+running_seen = "RUNNING workflow default" in clean
+sys.stdout.write(f"__JAIPH_TTY_RUNNING_SEEN__={'1' if running_seen else '0'}\n")
+
+# Check for orphaned ANSI escape sequences after stripping known CSI patterns.
+# A well-formed stream should have no leftover \x1b after CSI removal.
+orphaned_esc = "\x1b" in clean
+sys.stdout.write(f"__JAIPH_TTY_ANSI_CLEAN__={'1' if not orphaned_esc else '0'}\n")
+
+sys.stdout.buffer.write(captured)
+sys.exit(proc.returncode if proc.returncode is not None else 1)
+PY
+)"
+tty_status=$?
+set -e
+
+# Then — exit code
+e2e::assert_equals "${tty_status}" "0" "jaiph run async exits 0 in PTY"
+
+normalized_input="${tty_out//$'\r'/$'\n'}"
+normalized="$(e2e::normalize_output "${normalized_input}")"
+
+# assert_contains: PTY output includes ANSI escape sequences and redraw frames that make exact match infeasible
+e2e::assert_contains "${normalized}" "__JAIPH_TTY_RUNNING_SEEN__=1" "TTY stream observed RUNNING frame during async live render"
+
+# assert_contains: orphaned-escape check is a single flag extracted from the PTY stream
+e2e::assert_contains "${normalized}" "__JAIPH_TTY_ANSI_CLEAN__=1" "No orphaned ANSI escape sequences in PTY output"
+
+# --- Per-branch progress events appear under correct branch nodes ---
+
+# assert_contains: async interleaving order is nondeterministic in live PTY output
+e2e::assert_contains "${normalized}" "workflow branch_a" "branch_a appears in progress tree"
+e2e::assert_contains "${normalized}" "workflow branch_b" "branch_b appears in progress tree"
+
+# Subscript ₁ prefixes branch_a events, ₂ prefixes branch_b events
+# assert_contains: PTY redraws make exact full-output match infeasible
+e2e::assert_contains "${normalized}" "₁" "branch ₁ subscript present"
+e2e::assert_contains "${normalized}" "₂" "branch ₂ subscript present"
+
+# Log events from each branch appear with their branch subscript
+# assert_contains: async interleaving is nondeterministic
+e2e::assert_contains "${normalized}" "a-start" "branch_a log a-start present"
+e2e::assert_contains "${normalized}" "a-end" "branch_a log a-end present"
+e2e::assert_contains "${normalized}" "b-start" "branch_b log b-start present"
+e2e::assert_contains "${normalized}" "b-end" "branch_b log b-end present"
+
+# Script steps appear under their branches
+# assert_contains: async interleaving is nondeterministic
+e2e::assert_contains "${normalized}" "script slow_a" "script slow_a appears in progress tree"
+e2e::assert_contains "${normalized}" "script slow_b" "script slow_b appears in progress tree"
+
+# --- Final frame: both branches completed with resolved return values ---
+
+# assert_contains: PTY redraws make exact match infeasible
+e2e::assert_contains "${normalized}" "result-a" "handle ha resolved to result-a"
+e2e::assert_contains "${normalized}" "result-b" "handle hb resolved to result-b"
+
+# Both branches show completion markers
+# assert_contains: PTY redraws make exact match infeasible
+e2e::assert_contains "${normalized}" "workflow branch_a (<time>)" "branch_a completed with timing"
+e2e::assert_contains "${normalized}" "workflow branch_b (<time>)" "branch_b completed with timing"
+
+# Overall PASS
+# assert_contains: PTY redraws make exact match infeasible
+e2e::assert_contains "${normalized}" "PASS workflow default" "workflow default passed"
+
+# Canonicalize dynamic TTY refreshes and verify stable tree structure.
+# Extract only the lines we can stably match regardless of async interleaving order.
+tree_projection="$(
+  printf '%s\n' "${normalized}" | awk '
+    /^Jaiph: Running tty_async\.jh$/ { print; next }
+    /^workflow default$/ { print; next }
+    /^ .₁.+ workflow branch_a \(<time>\)$/ { print; next }
+    /^ .₂.+ workflow branch_b \(<time>\)$/ { print; next }
+    /PASS workflow default/ { print; next }
+  '
+)"
+
+# assert_contains: we extract stable subset lines; the full projection order depends on async timing
+e2e::assert_contains "${tree_projection}" "Jaiph: Running tty_async.jh" "tree projection: header"
+e2e::assert_contains "${tree_projection}" "workflow default" "tree projection: root workflow"
+e2e::assert_contains "${tree_projection}" "PASS workflow default" "tree projection: final PASS"
+
+e2e::pass "TTY async progress renders per-branch events correctly"

From 9052ad0e060396fda7d13914da313287dfdfacc3 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Mon, 20 Apr 2026 18:54:28 +0200
Subject: [PATCH 16/38] Attempt to fix CI

Signed-off-by: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
---
 .gitignore                                  |  10 +-
 QUEUE.md.tmp.4951                           | 229 --------------------
 safe_name                                   |   6 -
 src/runtime/docker.test.ts                  |   6 +-
 src/runtime/docker.ts                       | 132 ++++++++---
 src/runtime/kernel/node-workflow-runtime.ts |   4 +-
 6 files changed, 111 insertions(+), 276 deletions(-)
 delete mode 100644 QUEUE.md.tmp.4951
 delete mode 100644 safe_name

diff --git a/.gitignore b/.gitignore
index 1673153f..b15d9eec 100644
--- a/.gitignore
+++ b/.gitignore
@@ -48,4 +48,12 @@ e2e/ensure_fail.sh
 e2e/current_branch.sh
 e2e/assign_capture.sh
 
-.obsidian/
\ No newline at end of file
+.obsidian/
+
+# debug / temp directories (never commit)
+docker-*/
+nested-*/
+overlay-*/
+local-*/
+.tmp*/
+QUEUE.md.tmp.*
\ No newline at end of file
diff --git a/QUEUE.md.tmp.4951 b/QUEUE.md.tmp.4951
deleted file mode 100644
index f97c88a2..00000000
--- a/QUEUE.md.tmp.4951
+++ /dev/null
@@ -1,229 +0,0 @@
-# Jaiph Improvement Queue (Hard Rewrite Track)
-
-Process rules:
-
-1. Tasks are executed top-to-bottom.
-2. The first `##` section is always the current task.
-3. When a task is completed, remove that section entirely.
-4. Every task must be standalone: no hidden assumptions, no "read prior task" dependency.
-5. This queue assumes **hard rewrite semantics**:
-   - breaking changes are allowed,
-   - backward compatibility is **not** a design goal unless a task explicitly says otherwise.
-
----
-
-## Libs — project-scoped lib install + import resolution #dev-ready
-
-**Goal**  
-Add project-scoped library support. Libraries are git repos cloned into `<project>/.jaiph/libs/<name>/`. A new `jaiph install` CLI command manages installation. The import resolver gains a fallback that resolves lib paths from the workspace `.jaiph/libs/` directory. A lockfile tracks installed libs for reproducibility.
-
-**Part 1: `jaiph install <url[@version]>` CLI command**
-
-Add `src/cli/commands/install.ts` (follow the pattern of `init.ts`, `run.ts`).
-
-Behavior:
-- `jaiph install <repo-url>` — clone repo into `.jaiph/libs/<repo-name>/` (shallow: `--depth 1`).
-- `jaiph install <repo-url>@<version>` — clone at specific tag/branch (`--depth 1 --branch <version>`).
-- `jaiph install` (no args) — read `.jaiph/libs.lock` and install all entries.
-- Repo name is derived from the URL: last path segment, stripped of `.git` suffix (e.g. `github.com/you/queue-lib.git` → `queue-lib`).
-- If `.jaiph/libs/<name>/` already exists, skip (or `--force` to re-clone).
-- After install, upsert the entry in `.jaiph/libs.lock`.
-
-Lockfile format (`.jaiph/libs.lock`, JSON):
-```json
-{
-  "libs": [
-    { "name": "queue-lib", "url": "https://github.com/you/queue-lib.git", "version": "v1.0" }
-  ]
-}
-```
-
-**Part 2: Lib-aware import resolution**
-
-Currently `import "path" as alias` resolves relative to the importing file only (`resolveImportPath` in `src/transpile/resolve.ts`). Add a fallback: if relative resolution fails, check `<workspace-root>/.jaiph/libs/`.
-
-```jaiph
-import "queue-lib/queue" as queue   # resolves to .jaiph/libs/queue-lib/queue.jh
-```
-
-Import paths with a `/` that don't resolve relatively are split as `<lib-name>/<path-inside-lib>` and resolved to `<workspace>/.jaiph/libs/<lib-name>/<path-inside-lib>.jh`.
-
-Resolution order:
-1. Relative to importing file (existing behavior — unchanged).
-2. `<workspace-root>/.jaiph/libs/<first-segment>/<rest>.jh` (new fallback).
-
-The workspace root is already available via `detectWorkspaceRoot()` in `src/cli/shared/paths.ts`. The resolver needs the workspace root passed in (or detected). Missing lib deps fail at compile time — the existing `E_IMPORT_NOT_FOUND` error in `validate.ts` (line 273) handles this; no change needed there.
-
-**Part 3: `export script` support**
-
-Currently `export` works on `workflow` and `rule`. Verify it also works on `script` in the parser (`src/parser.ts`) and validator. If not, add it — libs need to export scripts.
-
-**Part 4: `queue.jh` as first lib**
-
-Create a standalone repo (or just the file for now in `lib/queue.jh` for testing). A markdown-section-based task queue manager backed by `QUEUE_DIR` env var. One file per project. `## heading` sections are tasks. Hashtags in headings (`#dev-ready`, `#bug`) are filterable tags.
-
-Exports:
-- `script get(project, tag?)` — return first `##` section, optionally filtered by `#tag`
-- `script list(project?, tag?)` — list section headings with tags; `--all` across projects
-- `script add(project, content)` — prepend a task section
-- `script complete(project)` — remove the first `##` section
-- `workflow next_task(project, tag)` — wrapper: get + return
-- `rule has_tasks(project)` — check if project has any sections
-
-**Part 5: Hashtag migration**
-
-Migrate `QUEUE.md` headings from `<!-- dev-ready -->` HTML comments to `#dev-ready` hashtags. This makes tags visible in Obsidian's native tag search/filter/graph.
-
-**Context**
-
-- CLI commands: `src/cli/commands/` — `init.ts`, `run.ts` for pattern reference.
-- Import resolver: `src/transpile/resolve.ts` — `resolveImportPath()` is the function to extend.
-- Validate: `src/transpile/validate.ts` line 271-281 — `E_IMPORT_NOT_FOUND` already fires for missing resolved paths. No change needed.
-- Workspace root: `src/cli/shared/paths.ts` — `detectWorkspaceRoot()`.
-- All call-sites of `resolveImportPath`: `build.ts`, `validate.ts`, `graph.ts`, `transpiler.ts`, `paths.ts`, `compiler-test-runner.ts` — the new lib-aware resolver must be wired in at each.
-- `export` keyword: `src/parser.ts` — verify `script` is supported alongside `workflow` and `rule`.
-- Existing import tests: `e2e/tests/116_cross_file_import.sh`, `e2e/tests/118_import_not_found.sh`.
-
-**Acceptance criteria**
-
-- `jaiph install <url[@version]>` clones into `.jaiph/libs/<name>/` and writes `.jaiph/libs.lock`.
-- `jaiph install` (no args) restores from lockfile.
-- `import "queue-lib/queue" as queue` resolves to `.jaiph/libs/queue-lib/queue.jh`.
-- Relative-path imports are unaffected (resolution order: relative first, libs second).
-- `export script` works (parser + validator).
-- `queue.jh` lib provides `get`, `list`, `add`, `complete`, `next_task`, `has_tasks`.
-- E2E test: install a lib, import it, call an exported script/workflow.
-- Existing E2E import tests still pass.
-- `QUEUE.md` hashtag migration: `<!-- dev-ready -->` → `#dev-ready` across all headings.
-
----
-
-## Runtime — credential proxy for Docker mode
-
-**Goal**  
-Containers should never hold real API keys. Implement a host-side HTTP proxy (the "Phantom Token" pattern) that intercepts outbound API requests from containers, strips a placeholder credential, and injects the real key before forwarding upstream. The agent inside the container literally cannot leak the real key — it never has it.
-
-**Design**
-
-1. **Host-side proxy** — a lightweight `http.createServer` bound to `127.0.0.1:<port>` (macOS/WSL2) or the `docker0` bridge IP (Linux). Receives requests from the container, swaps `x-api-key: placeholder` with the real key from host env, forwards to the upstream API, pipes the response back (including streaming SSE).
-2. **Container env injection** — instead of passing `ANTHROPIC_API_KEY=$real_key` into `docker run`, pass `ANTHROPIC_API_KEY=placeholder` + `ANTHROPIC_BASE_URL=http://host.docker.internal:<port>`.
-3. **Multi-backend routing** — Jaiph supports Claude and Cursor backends. Each backend's CLI must respect a base URL override env var. `claude` CLI supports `ANTHROPIC_BASE_URL`; `cursor-agent` may not — needs investigation.
-4. **Lifecycle** — proxy starts before the first Docker container launch, shuts down after the last container exits or on Jaiph process exit.
-
-**Context**
-
-- Pattern reference: [NanoClaw's credential proxy](https://jonno.nz/posts/nanoclaw-architecture-masterclass-in-doing-less/) — same approach, independently arrived at.
-- Current Docker execution path: `src/runtime/kernel/` — Docker run/exec logic, env var forwarding.
-- Dockerfile: `.jaiph/Dockerfile` — container image setup.
-- Backend CLI invocation: `src/runtime/kernel/node-workflow-runtime.ts` — where `claude` / `cursor-agent` commands are constructed with env vars.
-
-**Open questions**
-
-- Does `cursor-agent` support a base URL override? If not, the proxy pattern may require a wrapper script or LD_PRELOAD-based interception inside the container.
-- Single port with path-based routing vs one port per backend?
-- Should the proxy also enforce rate limits or audit-log API calls?
-
-**Acceptance criteria**
-
-- Host-side proxy starts automatically when Docker mode is active.
-- Containers receive only placeholder credentials — no real API keys in container env.
-- `claude` CLI calls from inside Docker succeed via the proxy.
-- Proxy handles streaming responses (SSE) correctly.
-- Real keys never appear in container logs, env dumps, or process listings.
-- Platform-specific host address resolution works (macOS, Linux).
-
----
-
-## Runtime — harden Docker execution environment
-
-**Goal**  
-Docker mode is the isolation boundary for workflow runs. Harden it: least-privilege mounts, explicit and documented env forwarding (what crosses the container boundary), network defaults, image supply chain, and failure modes when Docker is misconfigured or unavailable — so "Docker on" is a deliberate security posture, not accidental leakage.
-
-**Context**
-
-- Docker runtime: `src/runtime/kernel/` — look for `docker.ts` or Docker-related logic in the run path.
-- E2E Docker tests: `e2e/tests/72_docker_run_artifacts.sh`, `e2e/tests/73_docker_dockerfile_detection.sh`.
-- Config: `runtime.docker_enabled`, `runtime.docker_timeout`, `runtime.workspace` keys in `src/config.ts` and metadata parsing.
-
-**Acceptance criteria**
-
-- Threat-model notes (short section in `docs/sandboxing.md` or equivalent): what Docker is / isn't protecting against.
-- Concrete hardening changes in `docker.ts` / run path (e.g. mount validation, env allowlist or documented denylist, safer defaults) with unit tests.
-- No silent widen of host access without opt-in.
-
----
-
-## Runtime — default Docker when not CI or unsafe #dev-ready
-
-**Goal**  
-When the user has not opted into "unsafe" local execution, workflows should run in Docker by default. **Default `runtime.docker_enabled` to on** only when **neither** `CI=true` **nor** `JAIPH_UNSAFE=true` is set in the environment. If either is set, default Docker to **off** unless explicitly overridden via `runtime.docker_enabled` / `JAIPH_DOCKER_ENABLED`.
-
-Introduce **`JAIPH_UNSAFE=true`** as the explicit "run on host / skip Docker default" escape hatch for local development when Docker is unwanted; document it next to `CI`.
-
-**Context**
-
-- Config resolution: `src/config.ts` — `resolveDockerConfig()` or equivalent; where `runtime.docker_enabled` default is determined.
-- Env precedence: explicit `JAIPH_DOCKER_ENABLED` / in-file `runtime.docker_enabled` overrides defaults; then CI / unsafe default rule.
-- E2E Docker tests: `e2e/tests/72_docker_run_artifacts.sh`, `e2e/tests/73_docker_dockerfile_detection.sh` — may need env setup adjustments.
-
-**Acceptance criteria**
-
-- `resolveDockerConfig()` (and any CLI preflight messaging) implements the precedence: explicit `JAIPH_DOCKER_ENABLED` / in-file `runtime.docker_enabled` overrides defaults; then apply CI / unsafe default rule.
-- Unit tests for env combinations: plain local → Docker default on; `CI=true` → default off; `JAIPH_UNSAFE=true` → default off; both unset with explicit `JAIPH_DOCKER_ENABLED=false` → off.
-- `CHANGELOG` + sandboxing / configuration docs updated.
-
----
-
-## `jaiph serve` — expose workflows as an MCP server #dev-ready
-
-**Goal**  
-Add a `jaiph serve <file.jh>` command that starts a stdio MCP server. Each top-level workflow in the file becomes a callable MCP tool. This lets any MCP client (Cursor, Claude Desktop, custom agents) invoke Jaiph workflows directly.
-
-**Context**
-
-- MCP (Model Context Protocol) uses JSON-RPC 2.0 over stdio. A server must handle `initialize`, `tools/list`, and `tools/call`.
-- Jaiph already has a runtime (`src/runtime/kernel/node-workflow-runtime.ts`) that can execute workflows and capture output.
-- The `@modelcontextprotocol/sdk` npm package provides a Node.js server implementation, but the protocol is simple enough to implement directly (~200 lines for stdio JSON-RPC + the three methods).
-
-**Phase 1 — single text input (this task)**
-
-Each workflow becomes a tool with a single `input` string parameter:
-
-```json
-{
-  "name": "analyze_gaps",
-  "description": "workflow analyze_gaps from qa.jh",
-  "inputSchema": {
-    "type": "object",
-    "properties": {
-      "input": { "type": "string", "description": "Text input passed to the workflow" }
-    }
-  }
-}
-```
-
-The `input` value is injected into the workflow environment as `JAIPH_MCP_INPUT` (accessible via `${input}` interpolation or `$JAIPH_MCP_INPUT` in scripts). The tool response is the workflow's captured output (log messages + prompt results).
-
-**Phase 2 — typed parameters (future task)**
-
-Extend the language with workflow parameters: `workflow analyze(file: string, depth: number) { ... }`. These map directly to the tool's `inputSchema`. Not in scope for this task.
-
-**Scope**
-
-1. **CLI command** (`src/cli/commands/serve.ts`): add `jaiph serve <file.jh>` that parses the file, starts a stdio JSON-RPC server, and handles `initialize`, `tools/list`, `tools/call`.
-2. **Tool listing**: read the parsed module's `workflows` array. Each workflow becomes a tool entry with `name` = workflow name, `description` = `"workflow <name> from <filename>"`, `inputSchema` = single `input` string.
-3. **Tool execution**: on `tools/call`, run the named workflow using the existing runtime. Capture all output (logs, prompt results). Return as `content: [{ type: "text", text: output }]`.
-4. **Error handling**: if the workflow fails, return `isError: true` with the error message.
-5. **Config inheritance**: the `.jh` file's `config { ... }` block applies normally (backend, model, etc.).
-6. **E2E test**: a test that starts `jaiph serve` with a simple workflow, sends JSON-RPC messages via stdin, and verifies the tool list and a tool call response.
-7. **Docs**: add a section to `docs/index.html` and `docs/jaiph-skill.md` about MCP server mode.
-
-**Acceptance criteria**
-
-- `jaiph serve examples/greeting.jh` starts a stdio MCP server.
-- `tools/list` returns one tool per workflow.
-- `tools/call` executes the workflow and returns its output.
-- Errors produce `isError: true` responses (no server crash).
-- E2E test passes.
-
----
diff --git a/safe_name b/safe_name
deleted file mode 100644
index 40cbcc24..00000000
--- a/safe_name
+++ /dev/null
@@ -1,6 +0,0 @@
-Script started on Thu Apr  9 16:59:55 2026
-Command: =
-script: =: No such file or directory
-
-Command exit status: 1
-Script done on Thu Apr  9 16:59:55 2026
diff --git a/src/runtime/docker.test.ts b/src/runtime/docker.test.ts
index 23dd7c06..c6c7ef36 100644
--- a/src/runtime/docker.test.ts
+++ b/src/runtime/docker.test.ts
@@ -437,9 +437,11 @@ test("writeOverlayScript: creates executable script with fuse-overlayfs setup",
     assert.ok(content.includes("fuse-overlayfs"));
     assert.ok(content.includes("workspace overlay unavailable"));
     assert.ok(content.includes("using copy fallback"));
-    assert.ok(content.includes('rsync -a --delete "$LOWER"/ "$MERGED"/'));
+    assert.ok(content.includes("mktemp -d /tmp/jaiph-workspace."));
+    assert.ok(content.includes("rewrite_workspace_path()"));
+    assert.ok(content.includes('rsync -a --delete --no-owner --no-group'));
     assert.ok(content.includes("mktemp \"$MERGED/.jaiph-overlay-probe.XXXXXX\""));
-    assert.ok(content.includes('exec "$@"'));
+    assert.ok(content.includes('exec "${rewritten_args[@]}"'));
   } finally {
     rmSync(dirname(scriptPath), { recursive: true, force: true });
   }
diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts
index f0b7606d..69b16997 100644
--- a/src/runtime/docker.ts
+++ b/src/runtime/docker.ts
@@ -315,7 +315,39 @@ LOWER=/jaiph/workspace-ro
 UPPER=/tmp/overlay-upper
 WORK=/tmp/overlay-work
 MERGED=/jaiph/workspace
+RUNTIME_WORKSPACE="$MERGED"
 mkdir -p "$UPPER" "$WORK" "$MERGED"
+
+rewrite_workspace_path() {
+  local value="$1"
+  if [ "$RUNTIME_WORKSPACE" = "$MERGED" ]; then
+    printf '%s' "$value"
+    return
+  fi
+  case "$value" in
+    "$MERGED")
+      printf '%s' "$RUNTIME_WORKSPACE"
+      ;;
+    "$MERGED"/*)
+      printf '%s' "$RUNTIME_WORKSPACE"\${value#$MERGED}
+      ;;
+    *)
+      printf '%s' "$value"
+      ;;
+  esac
+}
+
+copy_workspace_with_rsync() {
+  local target="$1"
+  rsync -a --delete --no-owner --no-group --chmod=Du+rwx,Dgo+rx,Fu+rw,Fgo+r "$LOWER"/ "$target"/
+}
+
+copy_workspace_with_cp() {
+  local target="$1"
+  cp -a --no-preserve=ownership "$LOWER"/. "$target"/
+  chmod -R u+rwX "$target" 2>/dev/null || true
+}
+
 overlay_ok=0
 overlay_reason=""
 if command -v fuse-overlayfs >/dev/null 2>&1 && [ -e /dev/fuse ]; then
@@ -334,46 +366,74 @@ else
   overlay_reason="fuse-overlayfs unavailable or /dev/fuse missing"
 fi
 if [ "$overlay_ok" -ne 1 ]; then
-  if command -v rsync >/dev/null 2>&1; then
-    if rsync -a --delete "$LOWER"/ "$MERGED"/ 2>/tmp/jaiph-workspace-copy.err; then
-      printf 'jaiph docker: workspace overlay unavailable; using copy fallback at /jaiph/workspace' >&2
-      if [ -n "$overlay_reason" ]; then
-        printf ' (%s)' "$overlay_reason" >&2
+  tmp_workspace="$(mktemp -d /tmp/jaiph-workspace.XXXXXX 2>/dev/null || true)"
+  if [ -n "$tmp_workspace" ]; then
+    if command -v rsync >/dev/null 2>&1; then
+      if copy_workspace_with_rsync "$tmp_workspace" 2>/tmp/jaiph-workspace-copy.err; then
+        RUNTIME_WORKSPACE="$tmp_workspace"
+        printf 'jaiph docker: workspace overlay unavailable; using copy fallback at %s' "$RUNTIME_WORKSPACE" >&2
+        if [ -n "$overlay_reason" ]; then
+          printf ' (%s)' "$overlay_reason" >&2
+        fi
+        printf '\n' >&2
+        overlay_ok=1
+      else
+        copy_reason="$(tr '\n' ' ' </tmp/jaiph-workspace-copy.err | sed 's/[[:space:]]\+/ /g; s/^ //; s/ $//')"
+        rm -rf "$tmp_workspace"
+        printf 'jaiph docker: workspace overlay unavailable and copy fallback failed; container workspace may be incomplete' >&2
+        if [ -n "$overlay_reason" ]; then
+          printf ' (%s)' "$overlay_reason" >&2
+        fi
+        if [ -n "$copy_reason" ]; then
+          printf ' [copy fallback: %s]' "$copy_reason" >&2
+        fi
+        printf '\n' >&2
       fi
-      printf '\n' >&2
-      overlay_ok=1
     else
-      copy_reason="$(tr '\n' ' ' </tmp/jaiph-workspace-copy.err | sed 's/[[:space:]]\+/ /g; s/^ //; s/ $//')"
-      printf 'jaiph docker: workspace overlay unavailable and copy fallback failed; /jaiph/workspace may be incomplete' >&2
-      if [ -n "$overlay_reason" ]; then
-        printf ' (%s)' "$overlay_reason" >&2
-      fi
-      if [ -n "$copy_reason" ]; then
-        printf ' [copy fallback: %s]' "$copy_reason" >&2
+      if copy_workspace_with_cp "$tmp_workspace" 2>/tmp/jaiph-workspace-cp.err; then
+        RUNTIME_WORKSPACE="$tmp_workspace"
+        printf 'jaiph docker: workspace overlay unavailable; using cp fallback at %s' "$RUNTIME_WORKSPACE" >&2
+        if [ -n "$overlay_reason" ]; then
+          printf ' (%s)' "$overlay_reason" >&2
+        fi
+        printf '\n' >&2
+        overlay_ok=1
+      else
+        cp_reason="$(tr '\n' ' ' </tmp/jaiph-workspace-cp.err | sed 's/[[:space:]]\+/ /g; s/^ //; s/ $//')"
+        rm -rf "$tmp_workspace"
+        printf 'jaiph docker: workspace overlay unavailable and copy fallbacks are unavailable; container workspace may be incomplete' >&2
+        if [ -n "$overlay_reason" ]; then
+          printf ' (%s)' "$overlay_reason" >&2
+        fi
+        if [ -n "$cp_reason" ]; then
+          printf ' [cp fallback: %s]' "$cp_reason" >&2
+        fi
+        printf '\n' >&2
       fi
-      printf '\n' >&2
     fi
   else
-    if cp -a "$LOWER"/. "$MERGED"/ 2>/tmp/jaiph-workspace-cp.err; then
-      printf 'jaiph docker: workspace overlay unavailable; using cp fallback at /jaiph/workspace' >&2
-      if [ -n "$overlay_reason" ]; then
-        printf ' (%s)' "$overlay_reason" >&2
-      fi
-      printf '\n' >&2
-      overlay_ok=1
-    else
-      cp_reason="$(tr '\n' ' ' </tmp/jaiph-workspace-cp.err | sed 's/[[:space:]]\+/ /g; s/^ //; s/ $//')"
-      printf 'jaiph docker: workspace overlay unavailable and copy fallbacks are unavailable; /jaiph/workspace may be incomplete' >&2
-      if [ -n "$overlay_reason" ]; then
-        printf ' (%s)' "$overlay_reason" >&2
-      fi
-      if [ -n "$cp_reason" ]; then
-        printf ' [cp fallback: %s]' "$cp_reason" >&2
-      fi
-      printf '\n' >&2
+    printf 'jaiph docker: workspace overlay unavailable and temp workspace allocation failed; container workspace may be incomplete' >&2
+    if [ -n "$overlay_reason" ]; then
+      printf ' (%s)' "$overlay_reason" >&2
     fi
+    printf '\n' >&2
   fi
 fi
+
+if [ "$RUNTIME_WORKSPACE" != "$MERGED" ]; then
+  export JAIPH_WORKSPACE="$RUNTIME_WORKSPACE"
+  if [ -n "\${JAIPH_AGENT_TRUSTED_WORKSPACE:-}" ]; then
+    export JAIPH_AGENT_TRUSTED_WORKSPACE="$(rewrite_workspace_path "$JAIPH_AGENT_TRUSTED_WORKSPACE")"
+  fi
+  rewritten_args=()
+  for arg in "$@"; do
+    rewritten_args+=("$(rewrite_workspace_path "$arg")")
+  done
+  cd "$RUNTIME_WORKSPACE"
+  exec "\${rewritten_args[@]}"
+fi
+
+cd "$MERGED"
 exec "$@"
 `;
 
@@ -481,10 +541,10 @@ export function overlayMountPath(containerPath: string): string {
  *  1. workspace → /jaiph/workspace-ro:ro  (overlay lower layer / copy source)
  *  2. sandboxRunDir → /jaiph/run:rw       (single run artifacts)
  *
- * The image already contains a writable `/jaiph/workspace` directory.
- * `overlay-run.sh` mounts `fuse-overlayfs` there when available; otherwise it
- * copies the lower layer into that directory as a writable fallback. `/jaiph/run`
- * is outside the overlay, so run artifacts still persist to the host mount.
+ * The image already contains a `/jaiph/workspace` directory used as the overlay
+ * merge target. When overlay mounts are unavailable, `overlay-run.sh` falls back
+ * to a writable per-run workspace copy under `/tmp`. `/jaiph/run` is outside the
+ * overlay, so run artifacts still persist to the host mount.
  *
  * The container runs `jaiph run --raw <file>` using its own installed jaiph.
  */
diff --git a/src/runtime/kernel/node-workflow-runtime.ts b/src/runtime/kernel/node-workflow-runtime.ts
index b1e5d97e..cd996a42 100644
--- a/src/runtime/kernel/node-workflow-runtime.ts
+++ b/src/runtime/kernel/node-workflow-runtime.ts
@@ -17,7 +17,7 @@ import {
   plainMultilineOrchestrationForRuntime,
   tripleQuotedRawForRuntime,
 } from "../orchestration-text";
-import { CONTAINER_WORKSPACE, exportWorkspacePatch } from "../docker";
+import { CONTAINER_RUN_DIR, exportWorkspacePatch } from "../docker";
 
 const MAX_EMBED = 1024 * 1024;
 const MAX_RECURSION_DEPTH = 256;
@@ -454,7 +454,7 @@ export class NodeWorkflowRuntime {
   /** Best-effort: export workspace changes as a patch file for Docker runs. */
   private exportPatchIfDocker(): void {
     const ws = this.env.JAIPH_WORKSPACE;
-    if (ws !== CONTAINER_WORKSPACE) return;
+    if (!ws || this.env.JAIPH_RUNS_DIR !== CONTAINER_RUN_DIR) return;
     exportWorkspacePatch(ws, join(this.runDir, "workspace.patch"));
   }
 

From 477967c380ecbeb7f9d83837059a594a38ae56ac Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Mon, 20 Apr 2026 19:06:34 +0200
Subject: [PATCH 17/38] Docs: Record debug cruft cleanup in changelog, queue,
 and contributing guide
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The actual filesystem cleanup (deleting 22+ leftover debug directories,
removing tracked cruft files safe_name and QUEUE.md.tmp.4951, and adding
.gitignore patterns for docker-*/, nested-*/, overlay-*/, local-*/,
.tmp*/, QUEUE.md.tmp.*) was committed earlier. This commit records the
bookkeeping side:

- CHANGELOG.md: add entry describing the cleanup and disposition of
  safe_name, lib/, and run/ (all deleted — no live consumers found).
- QUEUE.md: remove the completed task from the queue.
- docs/contributing.md: add "Workspace hygiene" section documenting the
  .gitignore patterns and how to override them with git add -f.

Disposition of investigated paths:
  - safe_name: deleted (tracked file, no live consumers)
  - lib/: deleted (empty top-level directory, no live consumers)
  - run/: deleted (empty top-level directory, no live consumers)

No code changes; documentation and queue bookkeeping only.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CHANGELOG.md         |  1 +
 QUEUE.md             | 34 ----------------------------------
 docs/contributing.md | 15 +++++++++++++++
 3 files changed, 16 insertions(+), 34 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ed61cc2c..79600ef2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ## All changes
 
+- **Cleanup — Repo:** Delete top-level debug cruft and harden `.gitignore` — Removed 22+ leftover debug directories at the repo root (`docker-nested-arg.*`, `docker-nested-clean.*`, `overlay-warn.*`, `nested-run-arg.*`, `local-nested-arg.*`, `overlay-manual.*`, `docker-live-debug.*`, and similar) from an abandoned per-call isolation experiment. Also deleted stale tracked files: `safe_name`, `QUEUE.md.tmp.4951`, and empty top-level `lib/` and `run/` directories — none had live consumers in the source tree. Added `.gitignore` patterns (`docker-*/`, `nested-*/`, `overlay-*/`, `local-*/`, `.tmp*/`, `QUEUE.md.tmp.*`) under a `# debug / temp directories (never commit)` section so these cannot return without a deliberate `git add -f` override. No code changes; filesystem hygiene only.
 - **Test — E2E/Runtime:** PTY-based TTY test for `run async` progress rendering — New E2E test (`e2e/tests/131_tty_async_progress.sh`) exercises the live progress tree rendering path for `run async` workflows under a real PTY. The test spawns `jaiph run` with a workflow that fans out two concurrent async branches (`run async branch_a()`, `run async branch_b()`), each emitting deterministic progress events over time (log + script steps with sleep). A Python `pty.openpty()` harness captures the raw PTY stream and asserts: (1) each branch's progress events appear under the correct subscript-numbered node (₁, ₂) in the tree, (2) the final frame shows both branches completed with their resolved `Handle<T>` return values (`result-a`, `result-b`), (3) no orphaned ANSI escape sequences survive after CSI stripping, and (4) a `RUNNING` frame was observed during live rendering. The test uses only deterministic steps (no `prompt claude` or external dependencies) and `assert_contains` checks with order-insensitive matching to tolerate async interleaving. This closes a regression-coverage gap — the existing `81_tty_progress_tree.sh` covers synchronous workflows but not the async handle/deferred-resolution render path. Docs updated (`docs/testing.md`, `docs/spec-async-handles.md`).
 - **Feature — Runtime/Library:** Artifacts — runtime mount and `artifacts.jh` library for publishing files out of the sandbox — Workflows can now publish files from inside the Docker sandbox (or host workspace) to a host-readable location at `.jaiph/runs/<run_id>/artifacts/`. The feature is split across two layers. **Runtime layer:** the `NodeWorkflowRuntime` creates the `artifacts/` subdirectory under the run directory before the first workflow step and exposes its path via `JAIPH_ARTIFACTS_DIR` (resolves to `/jaiph/run/artifacts` inside the Docker sandbox, `<host_run_dir>/artifacts` on the host). The existing `/jaiph/run` mount in Docker mode already maps this directory to the host — no new mount is needed. **Library layer:** a new built-in library `.jaiph/libs/jaiphlang/artifacts.jh` (paired with `artifacts.sh`) provides three `export workflow` entries: `save(local_path, name)` copies a file into the artifacts directory; `save_patch(name)` runs `git diff` (excluding `.jaiph/`) and writes the patch; `apply_patch(path)` applies a patch via `git apply`. The library mirrors the existing `queue.jh` / `queue.py` pattern — `import script "./artifacts.sh" as artifacts` with dispatch by subcommand. The `.jaiph/` exclusion in `save_patch` prevents clobbering runtime state when a patch is applied. `JAIPH_ARTIFACTS_DIR` is cleaned from inherited env in `resolveRuntimeEnv` to prevent leaking across nested runs. Runtime unit tests verify `JAIPH_ARTIFACTS_DIR` is set, writable, and exists before workflow execution. E2E test (`129_artifacts_lib.sh`) exercises `save`, `save_patch`, `apply_patch`, clean-workspace patch, and invalid-patch failure. Implementation: `node-workflow-runtime.ts` (artifacts dir creation, env var), `env.ts` (env cleanup), `.jaiph/libs/jaiphlang/artifacts.jh` and `artifacts.sh` (library). Docs updated (`docs/libraries.md`, `docs/artifacts.md`, `docs/configuration.md`, `docs/index.html`).
 - **Feature — Language/Runtime:** `Handle<T>` value model for `run async` — `run async ref(args)` now returns a first-class `Handle<T>` value instead of being a fire-and-forget statement. `T` is the same return type the function would have under a synchronous `run`. Capture is supported: `const h = run async ref()`. The handle resolves to the eventual return value on first non-passthrough read (string interpolation, passing as argument to `run`, comparison, conditional branching, match subject, channel send). Passthrough operations (initial capture into `const`, re-assignment) do not force resolution. Once resolved, the handle is replaced in-place by the resolved string value; subsequent reads return the cached value. Workflow exit implicitly joins all remaining unresolved handles created in that scope — this is not an error and preserves backward compatibility. `recover` composition works with `run async`: `run async foo() recover(err) { … }` — the async branch retries using the same retry-limit semantics as non-async `recover` (default 10, configurable via `run.recover_limit`). `catch` also works for single-shot recovery. The parser accepts `recover(err) { … }` and `catch(err) { … }` after `run async ref(args)` (the previous attempt silently rejected this with a "trailing content" error). There is no fire-and-forget mode — every `run async` creates a handle tracked by the runtime. No explicit `await` keyword — resolution is implicit on first read or at workflow exit. The docs-site Jaiph syntax highlighter (`docs/assets/js/main.js`) recognizes `async` as a keyword. Implementation: `Handle<T>` registry in `NodeWorkflowRuntime` (`createHandle`, `resolveHandleResult`, `resolveHandleVar`, `resolveHandlesInInput`), `async` flag on `run_capture` const RHS in `src/types.ts`, async capture parsing in `src/parse/const-rhs.ts`, `recover`/`catch` parsing for `run async` in `src/parse/workflows.ts`, formatter round-trip in `src/format/emit.ts`. Spec: `docs/spec-async-handles.md`. Parser, formatter, runtime, and E2E tests added. Docs updated (`docs/language.md`, `docs/grammar.md`, `docs/jaiph-skill.md`, `docs/index.html`).
diff --git a/QUEUE.md b/QUEUE.md
index 2f82205a..56fbc4e1 100644
--- a/QUEUE.md
+++ b/QUEUE.md
@@ -13,40 +13,6 @@ Process rules:
 
 ***
 
-## Cleanup — delete top-level debug cruft and harden `.gitignore` #dev-ready
-
-**Goal**
-The repo root contains 22+ leftover debug directories from an abandoned per-call isolated experiment (`docker-nested-arg.*`, `docker-nested-clean.*`, `overlay-warn.*`, `nested-run-arg.*`, `local-nested-arg.*`, `overlay-manual.*`, `docker-live-debug.*`), plus stale `.tmp`, `.tmp-build`, `.tmp-debug`, `.tmp_run_debug`, `QUEUE.md.tmp.4951`, `safe_name`, top-level `lib/`, top-level `run/`. None are in `.gitignore`. Fix that, in one pass, so the workspace is readable at a glance and these don't return.
-
-**Scope**
-
-* Delete every leftover debug directory at the repo root matching `docker-*/`, `nested-*/`, `overlay-*/`, `local-*/`, `.tmp*/`. Verify with `git ls-files <pattern>` first that they are not tracked (they should not be).
-* Investigate three suspicious top-level paths: `safe_name`, `lib/`, `run/`. The default disposition is **delete**. Only keep one if you can identify a live consumer in the source tree (search with `rg`/`grep` for the path string). If a consumer exists, document it inline next to the deletion decision.
-* Delete tracked cruft files: `safe_name` and `QUEUE.md.tmp.4951`. Verify they are tracked first (`git ls-files`); use `git rm` rather than `rm` for tracked paths.
-* Add patterns to `.gitignore` so they cannot return without a deliberate override:
-  - `docker-*/`
-  - `nested-*/`
-  - `overlay-*/`
-  - `local-*/`
-  - `.tmp*/`
-  - `QUEUE.md.tmp.*`
-* Sanity-check: after the cleanup, `ls` at the repo root should show only documented project directories. No `.cidfile`, no `.pid`, no random temp dir names.
-
-**Non-goals**
-
-* Do not touch `.jaiph/runs/`, `dist/`, `node_modules/` — already in `.gitignore` and load-bearing.
-* Do not delete the `docker/` directory (singular, no suffix) — that is a different, intentional location.
-* No code changes; this task is filesystem hygiene only.
-
-**Acceptance criteria**
-
-* Repo root listing contains zero `docker-*`, `nested-*`, `overlay-*`, `local-*`, or `.tmp*` directories after the change.
-* `.gitignore` contains the patterns listed above; `git status` is clean immediately after deletion.
-* Disposition of `safe_name`, `lib/`, `run/` is recorded in the commit message (deleted, kept-and-why).
-* A second `npm run build && npm test` after the cleanup passes (proves nothing important was removed).
-
-***
-
 ## Cleanup — remove dead per-call-isolated leftovers from `src/runtime/docker.ts` #dev-ready
 
 **Goal**
diff --git a/docs/contributing.md b/docs/contributing.md
index ccf9e89a..f0fd6a43 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -60,6 +60,21 @@ For day-to-day work on the compiler and CLI you usually stay inside the clone: i
 
 Run a single Node test file after a build with e.g. `node --test dist/src/parse/parse-core.test.js`. The `dist/` paths mirror the source layout under `src/`.
 
+## Workspace hygiene
+
+The root `.gitignore` blocks common debug and temp directory patterns so they never reach version control:
+
+| Pattern | Purpose |
+|---------|---------|
+| `docker-*/` | Leftover Docker debug/experiment directories |
+| `nested-*/` | Nested-run debug directories |
+| `overlay-*/` | Overlay/fuse debug directories |
+| `local-*/` | Local debug directories |
+| `.tmp*/` | Temp build/debug directories |
+| `QUEUE.md.tmp.*` | Stale queue temp files |
+
+If you create throwaway directories during development, use one of these prefixes so they are automatically ignored. To track a file that matches a blocked pattern, use `git add -f`.
+
 ## Code philosophy
 
 Jaiph's codebase is maintained by both humans and AI agents. Code should be easy to read, navigate, and modify for both — which means the same thing: straightforward, flat, and explicit.

From 0a25ba13d2a6864429ceadfaf6df0a0c9bdba7aa Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Mon, 20 Apr 2026 19:16:10 +0200
Subject: [PATCH 18/38] Cleanup: Remove dead per-call-isolated leftovers from
 Docker runtime

Delete exportWorkspacePatch and findRunArtifacts from src/runtime/docker.ts,
exportPatchIfDocker from node-workflow-runtime.ts, and the findRunArtifacts
call in src/cli/commands/run.ts. These functions served the abandoned per-call
isolated keyword and are fully replaced by the artifacts.jh library
(artifacts.save_patch() for workspace patches, JAIPH_ARTIFACTS_DIR for
artifact discovery). Also removes ~150 LoC of dead tests in docker.test.ts
and updates docs (sandboxing.md, architecture.md, artifacts.md) to reflect
the removal.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CHANGELOG.md                                |   5 +-
 QUEUE.md                                    |  46 --------
 docs/architecture.md                        |   2 +-
 docs/artifacts.md                           |   4 +-
 docs/sandboxing.md                          |   2 +-
 src/cli/commands/run.ts                     |   7 +-
 src/runtime/docker.test.ts                  | 112 --------------------
 src/runtime/docker.ts                       |  63 +----------
 src/runtime/kernel/node-workflow-runtime.ts |   9 --
 9 files changed, 8 insertions(+), 242 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 79600ef2..406bc5ac 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,12 +2,13 @@
 
 ## All changes
 
+- **Cleanup — Runtime:** Remove dead per-call-isolated leftovers from Docker runtime — Deleted `exportWorkspacePatch` and `findRunArtifacts` from `src/runtime/docker.ts`, `exportPatchIfDocker` from `src/runtime/kernel/node-workflow-runtime.ts`, the `findRunArtifacts` call in `src/cli/commands/run.ts`, and ~150 LoC of dead tests in `src/runtime/docker.test.ts`. These functions were written for the abandoned per-call `isolated` keyword and have been fully replaced by the `artifacts.jh` library (`artifacts.save_patch()` for workspace patches, `JAIPH_ARTIFACTS_DIR` for artifact discovery). The automatic `workspace.patch` export during Docker teardown is removed — workflows that need a patch now request one explicitly via the artifacts library. Docs updated (`docs/sandboxing.md`, `docs/architecture.md`, `docs/artifacts.md`).
 - **Cleanup — Repo:** Delete top-level debug cruft and harden `.gitignore` — Removed 22+ leftover debug directories at the repo root (`docker-nested-arg.*`, `docker-nested-clean.*`, `overlay-warn.*`, `nested-run-arg.*`, `local-nested-arg.*`, `overlay-manual.*`, `docker-live-debug.*`, and similar) from an abandoned per-call isolation experiment. Also deleted stale tracked files: `safe_name`, `QUEUE.md.tmp.4951`, and empty top-level `lib/` and `run/` directories — none had live consumers in the source tree. Added `.gitignore` patterns (`docker-*/`, `nested-*/`, `overlay-*/`, `local-*/`, `.tmp*/`, `QUEUE.md.tmp.*`) under a `# debug / temp directories (never commit)` section so these cannot return without a deliberate `git add -f` override. No code changes; filesystem hygiene only.
 - **Test — E2E/Runtime:** PTY-based TTY test for `run async` progress rendering — New E2E test (`e2e/tests/131_tty_async_progress.sh`) exercises the live progress tree rendering path for `run async` workflows under a real PTY. The test spawns `jaiph run` with a workflow that fans out two concurrent async branches (`run async branch_a()`, `run async branch_b()`), each emitting deterministic progress events over time (log + script steps with sleep). A Python `pty.openpty()` harness captures the raw PTY stream and asserts: (1) each branch's progress events appear under the correct subscript-numbered node (₁, ₂) in the tree, (2) the final frame shows both branches completed with their resolved `Handle<T>` return values (`result-a`, `result-b`), (3) no orphaned ANSI escape sequences survive after CSI stripping, and (4) a `RUNNING` frame was observed during live rendering. The test uses only deterministic steps (no `prompt claude` or external dependencies) and `assert_contains` checks with order-insensitive matching to tolerate async interleaving. This closes a regression-coverage gap — the existing `81_tty_progress_tree.sh` covers synchronous workflows but not the async handle/deferred-resolution render path. Docs updated (`docs/testing.md`, `docs/spec-async-handles.md`).
 - **Feature — Runtime/Library:** Artifacts — runtime mount and `artifacts.jh` library for publishing files out of the sandbox — Workflows can now publish files from inside the Docker sandbox (or host workspace) to a host-readable location at `.jaiph/runs/<run_id>/artifacts/`. The feature is split across two layers. **Runtime layer:** the `NodeWorkflowRuntime` creates the `artifacts/` subdirectory under the run directory before the first workflow step and exposes its path via `JAIPH_ARTIFACTS_DIR` (resolves to `/jaiph/run/artifacts` inside the Docker sandbox, `<host_run_dir>/artifacts` on the host). The existing `/jaiph/run` mount in Docker mode already maps this directory to the host — no new mount is needed. **Library layer:** a new built-in library `.jaiph/libs/jaiphlang/artifacts.jh` (paired with `artifacts.sh`) provides three `export workflow` entries: `save(local_path, name)` copies a file into the artifacts directory; `save_patch(name)` runs `git diff` (excluding `.jaiph/`) and writes the patch; `apply_patch(path)` applies a patch via `git apply`. The library mirrors the existing `queue.jh` / `queue.py` pattern — `import script "./artifacts.sh" as artifacts` with dispatch by subcommand. The `.jaiph/` exclusion in `save_patch` prevents clobbering runtime state when a patch is applied. `JAIPH_ARTIFACTS_DIR` is cleaned from inherited env in `resolveRuntimeEnv` to prevent leaking across nested runs. Runtime unit tests verify `JAIPH_ARTIFACTS_DIR` is set, writable, and exists before workflow execution. E2E test (`129_artifacts_lib.sh`) exercises `save`, `save_patch`, `apply_patch`, clean-workspace patch, and invalid-patch failure. Implementation: `node-workflow-runtime.ts` (artifacts dir creation, env var), `env.ts` (env cleanup), `.jaiph/libs/jaiphlang/artifacts.jh` and `artifacts.sh` (library). Docs updated (`docs/libraries.md`, `docs/artifacts.md`, `docs/configuration.md`, `docs/index.html`).
 - **Feature — Language/Runtime:** `Handle<T>` value model for `run async` — `run async ref(args)` now returns a first-class `Handle<T>` value instead of being a fire-and-forget statement. `T` is the same return type the function would have under a synchronous `run`. Capture is supported: `const h = run async ref()`. The handle resolves to the eventual return value on first non-passthrough read (string interpolation, passing as argument to `run`, comparison, conditional branching, match subject, channel send). Passthrough operations (initial capture into `const`, re-assignment) do not force resolution. Once resolved, the handle is replaced in-place by the resolved string value; subsequent reads return the cached value. Workflow exit implicitly joins all remaining unresolved handles created in that scope — this is not an error and preserves backward compatibility. `recover` composition works with `run async`: `run async foo() recover(err) { … }` — the async branch retries using the same retry-limit semantics as non-async `recover` (default 10, configurable via `run.recover_limit`). `catch` also works for single-shot recovery. The parser accepts `recover(err) { … }` and `catch(err) { … }` after `run async ref(args)` (the previous attempt silently rejected this with a "trailing content" error). There is no fire-and-forget mode — every `run async` creates a handle tracked by the runtime. No explicit `await` keyword — resolution is implicit on first read or at workflow exit. The docs-site Jaiph syntax highlighter (`docs/assets/js/main.js`) recognizes `async` as a keyword. Implementation: `Handle<T>` registry in `NodeWorkflowRuntime` (`createHandle`, `resolveHandleResult`, `resolveHandleVar`, `resolveHandlesInInput`), `async` flag on `run_capture` const RHS in `src/types.ts`, async capture parsing in `src/parse/const-rhs.ts`, `recover`/`catch` parsing for `run async` in `src/parse/workflows.ts`, formatter round-trip in `src/format/emit.ts`. Spec: `docs/spec-async-handles.md`. Parser, formatter, runtime, and E2E tests added. Docs updated (`docs/language.md`, `docs/grammar.md`, `docs/jaiph-skill.md`, `docs/index.html`).
 - **Feature — Language/Runtime:** `recover` loop semantics for `run` steps — `recover` is a new first-class repair-and-retry primitive for `run` steps, distinct from `catch`. Syntax: `run ref() recover(err) { … }`. On failure, the binding receives merged stdout+stderr, the repair body executes, and the target is retried automatically. The loop stops when the target succeeds or the retry limit is exhausted. The default retry limit is 10; override per-module with `run.recover_limit` in a `config` block. `catch` remains unchanged (one-shot recovery). `recover` and `catch` are mutually exclusive on the same step. Supported for non-isolated, non-async `run` in workflows only. The docs-site syntax highlighter (`docs/assets/js/main.js`) recognizes `recover` as a keyword. Implementation: `recoverLoop` field on `WorkflowStepDef` in `src/types.ts`, `parseRunRecoverStep` in `src/parse/steps.ts`, retry loop in `NodeWorkflowRuntime`, `run.recover_limit` config key in `src/parse/metadata.ts`, formatter round-trip in `src/format/emit.ts`, validation in `src/transpile/validate.ts`. Parser, formatter, validation, runtime, and E2E tests added. Docs updated (`docs/language.md`, `docs/grammar.md`, `docs/configuration.md`, `docs/jaiph-skill.md`, `docs/index.html`).
-- **Feature — Docker:** Workspace immutability contract and patch export — Docker runs now enforce an explicit immutability contract: the host workspace is bind-mounted read-only and the writable `/jaiph/workspace` inside the container is a sandbox-local copy-on-write layer discarded on exit. The only persistence channel from a Docker run to the host is the run-artifacts directory (`/jaiph/run` → host `.jaiph/runs`). During teardown, the runtime automatically exports a `workspace.patch` file (best-effort `git diff --binary` after `git add -N .`) into the run directory so sandbox edits can be reviewed or applied on the host with `git apply`. Patch export is runtime teardown behavior owned by `NodeWorkflowRuntime`, not workflow logic — it runs regardless of workflow exit status and failures are reported on stderr without changing the workflow's reported status. When there are no workspace changes, `workspace.patch` is omitted (not created). Non-Docker (local) runs are unaffected. Implementation: `exportWorkspacePatch()` in `src/runtime/docker.ts`, `exportPatchIfDocker()` in `src/runtime/kernel/node-workflow-runtime.ts`. Unit tests for non-empty patch, empty patch, and non-git directory added. Docs updated (`docs/sandboxing.md`, `docs/architecture.md`, `docs/artifacts.md`).
+- **Feature — Docker:** Workspace immutability contract — Docker runs now enforce an explicit immutability contract: the host workspace is bind-mounted read-only and the writable `/jaiph/workspace` inside the container is a sandbox-local copy-on-write layer discarded on exit. The only persistence channel from a Docker run to the host is the run-artifacts directory (`/jaiph/run` → host `.jaiph/runs`). Non-Docker (local) runs are unaffected. *(The automatic `workspace.patch` teardown export originally shipped here has been superseded by `artifacts.save_patch()` and removed — see the cleanup entry above.)* Docs updated (`docs/sandboxing.md`, `docs/architecture.md`, `docs/artifacts.md`).
 - **Feature — Docker:** Default Docker when not CI or unsafe — Docker sandboxing is now **on by default** for local development. When neither `CI=true` nor `JAIPH_UNSAFE=true` is set in the environment, `runtime.docker_enabled` defaults to `true`. In CI environments or when `JAIPH_UNSAFE=true` is set, the default is `false`. Explicit overrides (`JAIPH_DOCKER_ENABLED` env var or in-file `runtime.docker_enabled`) always take precedence over the default rule. `JAIPH_UNSAFE=true` is the new explicit escape hatch for local development when Docker is unwanted. Implementation: `resolveDockerConfig()` in `src/runtime/docker.ts`. Unit tests for all env combinations added. Docs updated (`docs/sandboxing.md`, `docs/configuration.md`).
 - **Feature — Docker:** Harden Docker execution environment — Docker sandboxing now enforces least-privilege defaults and explicit boundary controls. Containers launch with `--cap-drop ALL --cap-add SYS_ADMIN --security-opt no-new-privileges`, dropping all Linux capabilities except the one required for fuse-overlayfs and preventing privilege escalation. A mount denylist rejects dangerous host paths (`/`, `/var/run/docker.sock`, `/run/docker.sock`, `/proc`, `/sys`, `/dev` and their subpaths) at validation time with `E_VALIDATE_MOUNT` — both in `validateMounts` and at `buildDockerArgs` time. An environment variable denylist (`SSH_*`, `GPG_*`, `AWS_*`, `GCP_*`, `AZURE_*`, `GOOGLE_*`, `DOCKER_*`, `KUBE*`, `NPM_TOKEN*`) prevents host credentials from leaking into the container; only `JAIPH_*` (except `JAIPH_DOCKER_*`) and agent prefixes (`ANTHROPIC_*`, `CLAUDE_*`, `CURSOR_*`) cross the container boundary. New exports: `validateMountHostPath`, `isEnvDenied`, `ENV_DENYLIST_PREFIXES`. Documentation adds a threat-model section (what Docker does and does not protect against), a failure-modes reference table (`E_DOCKER_*` / `E_VALIDATE_MOUNT` / `E_TIMEOUT`), expanded network-mode guidance, and the env denylist specification. Implementation: `src/runtime/docker.ts` (mount denylist, env denylist, security flags), `src/runtime/docker.test.ts` (unit tests for all new paths). Docs updated (`docs/sandboxing.md`).
 - **Feature — Language:** Optional module manifest keys (`module.name`, `module.version`, `module.description`) — The module-level `config { }` block now accepts three optional descriptive metadata keys: `module.name`, `module.version`, and `module.description`. All three are strings, all optional, and purely informational — they do not affect agent, run, or runtime behavior. Values are stored on `WorkflowMetadata.module` and round-trip through `jaiph format`. No semver validation is applied to `module.version`; any quoted string is accepted. Workflow-level `config` blocks reject `module.*` keys with `E_PARSE`, consistent with the existing `runtime.*` workflow guard. Future features (e.g. MCP tool metadata) may consume these fields. Implementation: `ALLOWED_KEYS` and `assignConfigKey` in `src/parse/metadata.ts`, `WorkflowMetadata.module` in `src/types.ts`, formatter round-trip in `src/format/emit.ts`, workflow-level rejection in `src/parse/workflows.ts`. Unit tests cover happy path, partial keys, coexistence with other config keys, formatter round-trip, and workflow-level rejection. Docs updated (`docs/configuration.md`, `docs/grammar.md`).
@@ -28,7 +29,7 @@
 - **Breaking — Runtime:** Remove `JAIPH_LIB` — The Node runtime no longer sets `JAIPH_LIB`, and isolated script subprocesses no longer receive it (`run-step-exec.ts`). `resolveRuntimeEnv` still deletes inherited `JAIPH_LIB` so a parent shell cannot inject a stale path. Workflows that used `source "$JAIPH_LIB/…"` must use `JAIPH_WORKSPACE`-relative paths, `import script`, or inline bash. Project-scoped **`.jaiph/libs/`** (`jaiph install`) is unchanged.
 - **Docs / E2E:** Documentation and tests no longer describe or assert `JAIPH_LIB` / `.jaiph/lib` (singular).
 - **Feature — Runtime:** Heartbeat file in run directory — The runtime now writes a `heartbeat` file (containing epoch-ms timestamp) to the run directory (`.jaiph/runs/<date>/<time>-<source>/heartbeat`) immediately on construction and refreshes it every 10 seconds. External tooling can `stat()` or read this file to detect whether a Jaiph process is still alive; a stale heartbeat (>~20s) means the process is dead. The timer is `.unref()`ed so it never keeps the Node process alive past its natural exit. Implementation: `startHeartbeat()` / `stopHeartbeat()` in `NodeWorkflowRuntime`. Unit test added.
-- **Fix — Docker:** Generic runtime image bootstrap and host run-dir mapping — Docker no longer assumes the selected image already contains `jaiph`, but it also no longer relies on a host-mounted `dist/` tree. When the selected base image lacks `jaiph`, Jaiph now builds a thin derived image from that base and installs the current local package with `npm install -g`, then runs `jaiph run --raw` there. Docker-backed runs now mount the resolved host runs root directly at `/jaiph/run`, so the default `.jaiph/runs`, relative `JAIPH_RUNS_DIR`, and absolute in-workspace `JAIPH_RUNS_DIR` all persist artifacts in the expected host location; absolute paths outside the workspace fail with `E_DOCKER_RUNS_DIR`. Implementation: `resolveImage()`, `resolveDockerHostRunsRoot()`, and `findRunArtifacts()` in `src/runtime/docker.ts`; `spawnExec()` in `src/cli/commands/run.ts`. Unit and E2E coverage updated.
+- **Fix — Docker:** Generic runtime image bootstrap and host run-dir mapping — Docker no longer assumes the selected image already contains `jaiph`, but it also no longer relies on a host-mounted `dist/` tree. When the selected base image lacks `jaiph`, Jaiph now builds a thin derived image from that base and installs the current local package with `npm install -g`, then runs `jaiph run --raw` there. Docker-backed runs now mount the resolved host runs root directly at `/jaiph/run`, so the default `.jaiph/runs`, relative `JAIPH_RUNS_DIR`, and absolute in-workspace `JAIPH_RUNS_DIR` all persist artifacts in the expected host location; absolute paths outside the workspace fail with `E_DOCKER_RUNS_DIR`. Implementation: `resolveImage()` and `resolveDockerHostRunsRoot()` in `src/runtime/docker.ts`; `spawnExec()` in `src/cli/commands/run.ts`. Unit and E2E coverage updated. *(Note: `findRunArtifacts()` originally shipped here has been removed — superseded by `JAIPH_ARTIFACTS_DIR`.)*
 
 # 0.9.1
 
diff --git a/QUEUE.md b/QUEUE.md
index 56fbc4e1..fc79bdc6 100644
--- a/QUEUE.md
+++ b/QUEUE.md
@@ -13,52 +13,6 @@ Process rules:
 
 ***
 
-## Cleanup — remove dead per-call-isolated leftovers from `src/runtime/docker.ts` #dev-ready
-
-**Goal**
-`src/runtime/docker.ts` (688 LoC) still exports four functions written exclusively for the now-abandoned per-call `isolated` keyword: `exportWorkspacePatch`, `findRunArtifacts`, plus the helper `exportPatchIfDocker` in `src/runtime/kernel/node-workflow-runtime.ts`. These have one or two live callers each, all of which are themselves transitional code from the same abandoned design. Once the new `artifacts.jh` lib has landed (it replaces the use case end-to-end), these can go. Net reduction: ~200 LoC of source + ~150 LoC of dead tests in `src/runtime/docker.test.ts`.
-
-**Context (read before starting)**
-
-* `exportWorkspacePatch(workspaceDir, outputPath)` writes a `git diff` patch when running inside the Docker sandbox. Single live caller: `NodeWorkflowRuntime.exportPatchIfDocker()` (in `src/runtime/kernel/node-workflow-runtime.ts`), which writes `<runDir>/workspace.patch` at workflow end. The new `artifacts.save_patch()` workflow in `.jaiph/libs/jaiphlang/artifacts.jh` (shipped by the artifacts task) replaces this use case explicitly: callers who want a patch ask for one by name, with the path returned to them.
-* `findRunArtifacts(sandboxRunDir)` discovers the latest run dir under a Docker-mounted artifacts area. Single live caller: `src/cli/commands/run.ts:367` — the host reads it after the sandbox exits to surface the inner run's artifacts. With the artifacts task's explicit `JAIPH_ARTIFACTS_DIR` mount and known path, this discovery is no longer needed: the host already knows where to look.
-* The `isolated` keyword is not part of this codebase. There is no per-call isolation primitive to keep these helpers alive for.
-
-**Scope**
-
-* **Precondition check**: before deleting, run `rg 'exportWorkspacePatch|findRunArtifacts|exportPatchIfDocker' src/` and verify the only callers are the ones listed above. If any new caller has appeared, evaluate it on the spot — either it is also dead and can go in this task, or removal is blocked and you stop and report.
-* **Precondition check**: confirm the artifacts task has shipped (look for `.jaiph/libs/jaiphlang/artifacts.jh` and a working `artifacts.save_patch`). If it has not, this task is not ready — do not attempt half-removal that breaks the runtime.
-* Remove from `src/runtime/docker.ts`:
-  - `exportWorkspacePatch` (function + export)
-  - `findRunArtifacts` (function + export)
-* Remove from `src/runtime/kernel/node-workflow-runtime.ts`:
-  - `exportPatchIfDocker` (private method)
-  - The import of `exportWorkspacePatch` from `../docker`
-  - Any call site of `exportPatchIfDocker` (verify zero remain after the method is gone)
-* Remove from `src/cli/commands/run.ts`:
-  - The `findRunArtifacts(sandboxRunDir)` call at line ~367
-  - The import of `findRunArtifacts`
-  - Any code that consumes the result of `findRunArtifacts` and is now dead (chase the value, do not leave dangling variables)
-* Remove from `src/runtime/docker.test.ts`:
-  - All `findRunArtifacts: ...` test cases
-  - All `exportWorkspacePatch: ...` test cases
-  - The shared test fixtures used only by those tests
-
-**Non-goals**
-
-* Do not touch `writeOverlayScript`, `overlayMountPath`, `buildDockerArgs`, or other docker.ts functions — those remain load-bearing for the whole-program Docker sandbox.
-* Do not modify the artifacts lib or its runtime mount; this task only removes the predecessor primitives.
-* Do not collapse env vars or config keys — that is a separate concern explicitly out of scope.
-
-**Acceptance criteria**
-
-* `rg 'exportWorkspacePatch|findRunArtifacts|exportPatchIfDocker' src/` returns zero matches.
-* `npm run build` succeeds with no TypeScript errors after removal.
-* `npm test` passes (proves no remaining test depends on the deleted primitives).
-* Net diff: ~200 LoC removed from `src/runtime/docker.ts` and `src/runtime/kernel/node-workflow-runtime.ts`, ~150 LoC of dead tests removed from `src/runtime/docker.test.ts`. If your diff is materially smaller, you missed something; if materially larger, you are deleting more than the task scope — stop and reassess.
-
-***
-
 ## Cleanup — consolidate the 5-way test directory split #dev-ready
 
 **Goal**
diff --git a/docs/architecture.md b/docs/architecture.md
index b96d1bb9..bfd4a8ea 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -57,7 +57,7 @@ All orchestration — local `jaiph run`, `jaiph test`, and **Docker `jaiph run`*
 
 - **Docker runtime helper (`src/runtime/docker.ts`)**
   - Parses mount specs, resolves Docker config (image, network, timeout), and builds the `docker run` invocation used by `jaiph run --docker`. The container runs the same `node-workflow-runner` process as local execution. The default image is the official `ghcr.io/jaiphlang/jaiph-runtime` GHCR image; every selected image must already contain `jaiph` (no auto-install or derived-image build at runtime). The spawn call uses `stdio: ["ignore", "pipe", "pipe"]` — stdin is ignored to prevent the Docker CLI from blocking on stdin EOF, which would stall event streaming and cause the host CLI to hang after the container exits.
-  - **Workspace immutability:** Docker runs cannot modify the host workspace. The host checkout is mounted read-only; `/jaiph/workspace` is a sandbox-local copy-on-write overlay discarded on exit. The only host-writable path is `/jaiph/run` (run artifacts). During teardown, `exportWorkspacePatch()` emits a `workspace.patch` file (best-effort `git diff --binary`) into the run directory so sandbox edits can be reviewed or applied on the host. See [Sandboxing](sandboxing.md) for the full contract.
+  - **Workspace immutability:** Docker runs cannot modify the host workspace. The host checkout is mounted read-only; `/jaiph/workspace` is a sandbox-local copy-on-write overlay discarded on exit. The only host-writable path is `/jaiph/run` (run artifacts). Workflows that need to capture workspace changes should use the `artifacts.save_patch()` library function, which writes a named patch into the artifacts directory. See [Sandboxing](sandboxing.md) for the full contract and [Libraries — `jaiphlang/artifacts`](libraries.md#jaiphlangartifacts--publishing-files-out-of-the-sandbox) for the patch workflow.
 
 ## Runtime vs CLI responsibilities
 
diff --git a/docs/artifacts.md b/docs/artifacts.md
index 241e0593..78f3a704 100644
--- a/docs/artifacts.md
+++ b/docs/artifacts.md
@@ -23,7 +23,6 @@ The runtime uses a UTC-dated hierarchy. Each run gets its own folder: date, then
       inbox/                           # inbox message files (when channels are used)
       .seq                             # step-sequence counter (kernel/seq-alloc.ts)
       run_summary.jsonl                # durable event timeline
-      workspace.patch                  # (Docker only) git diff of sandbox workspace changes
 ```
 
 Sequence prefixes are **monotonic and unique** per run (allocated in the kernel), so artifact names sort in execution order. For how this fits into the CLI and kernel, see [Architecture — Durable artifact layout](architecture.md#durable-artifact-layout).
@@ -34,8 +33,7 @@ Sequence prefixes are **monotonic and unique** per run (allocated in the kernel)
 - **`run_summary.jsonl`** — Append-only JSONL timeline: workflow boundaries, step start/end, structured log lines, inbox-related events. Useful for tooling and post-run analysis.
 - **`inbox/`** — When you use channels, message payloads can be reflected as files under the run for inspection (see [Inbox & Dispatch](inbox.md)).
 - **`.seq`** — Internal counter backing the numeric prefixes; you normally do not edit it.
-- **`artifacts/`** — User-published files created by the `jaiphlang/artifacts` library. The runtime creates this directory before the first workflow step runs and exposes its path via `JAIPH_ARTIFACTS_DIR`. Files written here survive sandbox teardown because the directory is on the host filesystem (mapped via the existing `/jaiph/run` mount in Docker mode). See [Libraries — `jaiphlang/artifacts`](libraries.md#jaiphlangartifacts--publishing-files-out-of-the-sandbox).
-- **`workspace.patch`** — (Docker runs only) A `git diff --binary` patch capturing all workspace modifications made during the run. Generated automatically during runtime teardown when Docker sandboxing is enabled and the workspace has changes. The patch is sufficient to review or `git apply` on the host. Omitted when there are no workspace changes. See [Sandboxing — Workspace patch export](sandboxing.md#runtime-behavior).
+- **`artifacts/`** — User-published files created by the `jaiphlang/artifacts` library. The runtime creates this directory before the first workflow step runs and exposes its path via `JAIPH_ARTIFACTS_DIR`. Files written here survive sandbox teardown because the directory is on the host filesystem (mapped via the existing `/jaiph/run` mount in Docker mode). Workflows that need to capture workspace changes should call `artifacts.save_patch(name)`, which writes a `git diff` patch here. See [Libraries — `jaiphlang/artifacts`](libraries.md#jaiphlangartifacts--publishing-files-out-of-the-sandbox).
 
 ## Keeping runs out of git
 
diff --git a/docs/sandboxing.md b/docs/sandboxing.md
index 620e3896..19c5a1b9 100644
--- a/docs/sandboxing.md
+++ b/docs/sandboxing.md
@@ -145,7 +145,7 @@ The working directory is `/jaiph/workspace`. The host CLI generates `overlay-run
 
 **Workspace immutability contract** -- Docker runs cannot directly modify the host workspace. The host checkout is bind-mounted read-only; the writable `/jaiph/workspace` inside the container is a sandbox-local copy-on-write layer (fuse-overlayfs or copy fallback) whose state is discarded on container exit. The only persistence channel from a Docker run to the host is the run-artifacts directory (`/jaiph/run` → host `.jaiph/runs`). Non-Docker (local) runs are unaffected by this contract.
 
-**Workspace patch export** -- When a Docker-backed run modifies files under `/jaiph/workspace`, the runtime automatically exports a `workspace.patch` file into the run directory during teardown (`exportWorkspacePatch` in `docker.ts`, called from `NodeWorkflowRuntime`). The patch is generated with `git diff --binary` (after `git add -N .` for untracked files) and is sufficient to review or `git apply` on the host. Patch export is best-effort: it runs regardless of workflow exit status, and failures are reported on stderr without changing the workflow's reported status. When there are no workspace changes, the `workspace.patch` file is omitted (not created). The bundled `.jaiph/Dockerfile` image includes `git`.
+**Workspace patch export** -- The runtime no longer automatically exports a `workspace.patch` during teardown. Workflows that need to capture workspace changes should use the `artifacts.save_patch(name)` library function from `jaiphlang/artifacts`, which writes a named `git diff` patch into the `artifacts/` subdirectory. This is explicit and composable — callers choose when and what to capture, and the patch lands in the standard artifacts directory alongside other published files. See [Libraries — `jaiphlang/artifacts`](libraries.md#jaiphlangartifacts--publishing-files-out-of-the-sandbox). The bundled `.jaiph/Dockerfile` image includes `git`.
 
 **Network** -- `"default"` omits `--network`, which uses Docker's default bridge network (outbound access allowed). `"none"` passes `--network none` and fully disables networking -- use this for workflows that should not make external calls. Any other value (e.g. a custom Docker network name) is passed through as-is. Set `runtime.docker_network` in config or `JAIPH_DOCKER_NETWORK` in the environment.
 
diff --git a/src/cli/commands/run.ts b/src/cli/commands/run.ts
index 1a87ceab..c1ab63ee 100644
--- a/src/cli/commands/run.ts
+++ b/src/cli/commands/run.ts
@@ -30,7 +30,6 @@ import {
   resolveDockerConfig,
   spawnDockerProcess,
   cleanupDocker,
-  findRunArtifacts,
   resolveDockerHostRunsRoot,
 } from "../../runtime/docker";
 import {
@@ -363,11 +362,7 @@ function reportResult(
   let runDir: string | undefined;
   let summaryFile: string | undefined;
 
-  if (sandboxRunDir) {
-    const artifacts = findRunArtifacts(sandboxRunDir);
-    runDir = artifacts.runDir;
-    summaryFile = artifacts.summaryFile;
-  } else if (existsSync(metaFile)) {
+  if (existsSync(metaFile)) {
     const metaLines = readFileSync(metaFile, "utf8").split(/\r?\n/);
     for (const line of metaLines) {
       if (line.startsWith("run_dir=")) {
diff --git a/src/runtime/docker.test.ts b/src/runtime/docker.test.ts
index c6c7ef36..20f00cb8 100644
--- a/src/runtime/docker.test.ts
+++ b/src/runtime/docker.test.ts
@@ -9,7 +9,6 @@ import {
   buildDockerArgs,
   remapDockerEnv,
   overlayMountPath,
-  findRunArtifacts,
   resolveDockerHostRunsRoot,
   writeOverlayScript,
   resolveImage,
@@ -18,7 +17,6 @@ import {
   isEnvDenied,
   ENV_DENYLIST_PREFIXES,
   GHCR_IMAGE_REPO,
-  exportWorkspacePatch,
   type MountSpec,
   type DockerRunConfig,
   type DockerSpawnOptions,
@@ -447,58 +445,6 @@ test("writeOverlayScript: creates executable script with fuse-overlayfs setup",
   }
 });
 
-// ---------------------------------------------------------------------------
-// findRunArtifacts
-// ---------------------------------------------------------------------------
-
-test("findRunArtifacts: discovers run dir and summary file", () => {
-  const tmp = mkdtempSync(join(tmpdir(), "jaiph-test-find-"));
-  try {
-    const runDir = join(tmp, "2026-04-17", "09-30-00-test.jh");
-    mkdirSync(runDir, { recursive: true });
-    writeFileSync(join(runDir, "run_summary.jsonl"), "{}");
-    const result = findRunArtifacts(tmp);
-    assert.equal(result.runDir, runDir);
-    assert.equal(result.summaryFile, join(runDir, "run_summary.jsonl"));
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
-
-test("findRunArtifacts: returns runDir without summary if missing", () => {
-  const tmp = mkdtempSync(join(tmpdir(), "jaiph-test-find-"));
-  try {
-    const runDir = join(tmp, "2026-04-17", "09-30-00-test.jh");
-    mkdirSync(runDir, { recursive: true });
-    const result = findRunArtifacts(tmp);
-    assert.equal(result.runDir, runDir);
-    assert.equal(result.summaryFile, undefined);
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
-
-test("findRunArtifacts: returns empty for non-existent dir", () => {
-  const result = findRunArtifacts("/tmp/jaiph-nonexistent-" + Date.now());
-  assert.equal(result.runDir, undefined);
-  assert.equal(result.summaryFile, undefined);
-});
-
-test("findRunArtifacts: returns latest run when multiple exist", () => {
-  const tmp = mkdtempSync(join(tmpdir(), "jaiph-test-find-"));
-  try {
-    const older = join(tmp, "2026-04-17", "09-30-00-test.jh");
-    const newer = join(tmp, "2026-04-17", "09-31-00-test.jh");
-    mkdirSync(older, { recursive: true });
-    mkdirSync(newer, { recursive: true });
-    writeFileSync(join(newer, "run_summary.jsonl"), "{}");
-    const result = findRunArtifacts(tmp);
-    assert.equal(result.runDir, newer);
-    assert.equal(result.summaryFile, join(newer, "run_summary.jsonl"));
-  } finally {
-    rmSync(tmp, { recursive: true, force: true });
-  }
-});
 
 // ---------------------------------------------------------------------------
 // spawnDockerProcess: stdin must be ignored
@@ -700,61 +646,3 @@ test("buildDockerArgs: includes --cap-drop ALL and --security-opt no-new-privile
   assert.equal(args[secOptIdx + 1], "no-new-privileges");
 });
 
-// ---------------------------------------------------------------------------
-// exportWorkspacePatch
-// ---------------------------------------------------------------------------
-
-test("exportWorkspacePatch writes patch when git repo has changes", () => {
-  const dir = mkdtempSync(join(tmpdir(), "jaiph-patch-test-"));
-  const patchOut = join(dir, "workspace.patch");
-  try {
-    const { execSync } = require("node:child_process");
-    execSync("git init", { cwd: dir, stdio: "ignore" });
-    execSync("git config user.email test@test.com", { cwd: dir, stdio: "ignore" });
-    execSync("git config user.name test", { cwd: dir, stdio: "ignore" });
-    // Create initial commit so diff has a baseline
-    writeFileSync(join(dir, "initial.txt"), "initial\n");
-    execSync("git add . && git commit -m init", { cwd: dir, stdio: "ignore" });
-    // Make a change
-    writeFileSync(join(dir, "new-file.txt"), "hello\n");
-
-    const result = exportWorkspacePatch(dir, patchOut);
-    assert.equal(result, true, "should return true when patch is non-empty");
-    assert.ok(existsSync(patchOut), "patch file should exist");
-    const content = readFileSync(patchOut, "utf8");
-    assert.ok(content.includes("new-file.txt"), "patch should reference the new file");
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
-  }
-});
-
-test("exportWorkspacePatch returns false and omits file when no changes", () => {
-  const dir = mkdtempSync(join(tmpdir(), "jaiph-patch-test-"));
-  const patchOut = join(dir, "workspace.patch");
-  try {
-    const { execSync } = require("node:child_process");
-    execSync("git init", { cwd: dir, stdio: "ignore" });
-    execSync("git config user.email test@test.com", { cwd: dir, stdio: "ignore" });
-    execSync("git config user.name test", { cwd: dir, stdio: "ignore" });
-    writeFileSync(join(dir, "initial.txt"), "initial\n");
-    execSync("git add . && git commit -m init", { cwd: dir, stdio: "ignore" });
-
-    const result = exportWorkspacePatch(dir, patchOut);
-    assert.equal(result, false, "should return false when no changes");
-    assert.ok(!existsSync(patchOut), "patch file should not exist");
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
-  }
-});
-
-test("exportWorkspacePatch returns false for non-git directory", () => {
-  const dir = mkdtempSync(join(tmpdir(), "jaiph-patch-test-"));
-  const patchOut = join(dir, "workspace.patch");
-  try {
-    const result = exportWorkspacePatch(dir, patchOut);
-    assert.equal(result, false, "should return false for non-git dir");
-    assert.ok(!existsSync(patchOut), "patch file should not exist");
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
-  }
-});
diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts
index 69b16997..9911f279 100644
--- a/src/runtime/docker.ts
+++ b/src/runtime/docker.ts
@@ -1,5 +1,5 @@
 import { execFileSync, execSync, spawn, ChildProcess } from "node:child_process";
-import { existsSync, mkdirSync, mkdtempSync, readFileSync, readdirSync, rmSync, statSync, writeFileSync } from "node:fs";
+import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join, resolve, dirname, relative } from "node:path";
 import type { RuntimeConfig } from "../types";
@@ -684,65 +684,4 @@ export function cleanupDocker(result: DockerSpawnResult): void {
   }
 }
 
-// ---------------------------------------------------------------------------
-// Workspace patch export (Docker teardown)
-// ---------------------------------------------------------------------------
-
-/**
- * Export a git diff of workspace changes to a patch file.
- * Used during Docker run teardown to capture sandbox-local modifications.
- *
- * Contract:
- * - When there are changes, writes `workspace.patch` (git apply-able).
- * - When there are no changes, the file is omitted (not created).
- * - Best-effort: failures are reported on stderr but do not affect workflow exit status.
- *
- * @returns true if a non-empty patch was written.
- */
-export function exportWorkspacePatch(workspaceDir: string, outputPath: string): boolean {
-  try {
-    // Stage intent-to-add for untracked files so they appear in git diff
-    execSync("git add -N .", { cwd: workspaceDir, stdio: "ignore", timeout: 30_000 });
-  } catch {
-    // Not a git repo or no new files — continue to diff
-  }
-  try {
-    const diff = execSync("git diff --binary", {
-      cwd: workspaceDir,
-      timeout: 60_000,
-      maxBuffer: 50 * 1024 * 1024,
-    });
-    if (!diff || diff.length === 0) return false;
-    writeFileSync(outputPath, diff);
-    return true;
-  } catch (err: unknown) {
-    const msg = err instanceof Error ? err.message : String(err);
-    process.stderr.write(`jaiph docker: workspace patch export failed: ${msg}\n`);
-    return false;
-  }
-}
-
-export function findRunArtifacts(
-  sandboxRunDir: string,
-): { runDir?: string; summaryFile?: string } {
-  if (!existsSync(sandboxRunDir)) return {};
-  const candidates: string[] = [];
-  for (const dateDir of readdirSync(sandboxRunDir)) {
-    const datePath = join(sandboxRunDir, dateDir);
-    if (!statSync(datePath).isDirectory()) continue;
-    for (const runEntry of readdirSync(datePath)) {
-      const runPath = join(datePath, runEntry);
-      if (!statSync(runPath).isDirectory()) continue;
-      candidates.push(runPath);
-    }
-  }
-  candidates.sort();
-  const runDir = candidates[candidates.length - 1];
-  if (!runDir) return {};
-  const summaryFile = join(runDir, "run_summary.jsonl");
-  return {
-    runDir,
-    summaryFile: existsSync(summaryFile) ? summaryFile : undefined,
-  };
-}
 
diff --git a/src/runtime/kernel/node-workflow-runtime.ts b/src/runtime/kernel/node-workflow-runtime.ts
index cd996a42..00389d65 100644
--- a/src/runtime/kernel/node-workflow-runtime.ts
+++ b/src/runtime/kernel/node-workflow-runtime.ts
@@ -17,7 +17,6 @@ import {
   plainMultilineOrchestrationForRuntime,
   tripleQuotedRawForRuntime,
 } from "../orchestration-text";
-import { CONTAINER_RUN_DIR, exportWorkspacePatch } from "../docker";
 
 const MAX_EMBED = 1024 * 1024;
 const MAX_RECURSION_DEPTH = 256;
@@ -451,13 +450,6 @@ export class NodeWorkflowRuntime {
     }
   }
 
-  /** Best-effort: export workspace changes as a patch file for Docker runs. */
-  private exportPatchIfDocker(): void {
-    const ws = this.env.JAIPH_WORKSPACE;
-    if (!ws || this.env.JAIPH_RUNS_DIR !== CONTAINER_RUN_DIR) return;
-    exportWorkspacePatch(ws, join(this.runDir, "workspace.patch"));
-  }
-
   async runDefault(args: string[]): Promise<number> {
     this.emitWorkflow("WORKFLOW_START", "default");
     const rootScope: Scope = {
@@ -481,7 +473,6 @@ export class NodeWorkflowRuntime {
     });
     const result = await this.executeWorkflow(resolved.filePath, resolved.workflow.name, rootScope, args, false);
     this.emitWorkflow("WORKFLOW_END", "default");
-    this.exportPatchIfDocker();
     this.stopHeartbeat();
     return result.status;
   }

From 1d642be5b9e9e31060320fb625e2f6a1c18c8da4 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Mon, 20 Apr 2026 21:29:46 +0200
Subject: [PATCH 19/38] Fix: Exclude .jaiph/runs from docker copy; TTY
 stderr_line via writeTTYLine

When fuse overlay is unavailable, rsync and the cp fallback no longer copy
.jaiph/runs; emit a clear stderr line before the temp workspace copy.

In TTY mode, stderr_line events use writeTTYLine so lines show immediately
without clearing/redrawing the running status line.

Add QUEUE item for agent_inbox workflow quoting noise.

Signed-off-by: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
---
 QUEUE.md                           | 60 ++++++++++++++++++++++++++++++
 src/cli/run/stderr-handler.test.ts | 30 +++++++++++++++
 src/cli/run/stderr-handler.ts      | 11 ++----
 src/runtime/docker.test.ts         |  9 ++++-
 src/runtime/docker.ts              | 32 +++++++++++++++-
 5 files changed, 131 insertions(+), 11 deletions(-)

diff --git a/QUEUE.md b/QUEUE.md
index fc79bdc6..684d81ca 100644
--- a/QUEUE.md
+++ b/QUEUE.md
@@ -13,6 +13,66 @@ Process rules:
 
 ***
 
+## Bug — double quotes in workflow step / log output (agent_inbox) #dev-ready
+
+**Goal**
+When running `CI=true ./examples/agent_inbox.jh`, step titles and reviewer lines show broken or noisy quoting: e.g. `message="\"Found 3 issues in auth module\""`, and `! Critical issue: "Summary: "Found 3 issues in auth module""`. Messages that contain double quotes should render readably (single consistent escaping or structured display), not as nested `\"` soup or ambiguous `""` pairs.
+
+**Sample — before / after**
+
+*Before* (current; excerpt from `CI=true ./examples/agent_inbox.jh`):
+
+```
+Jaiph: Running agent_inbox.jh
+
+workflow default
+  ▸ workflow scanner
+  ·   ℹ Scanning for issues...
+  ✓ workflow scanner (0s)
+  ▸ workflow analyst (message="\"Found 3 issues in auth module\"", chan="findings", sender="scanner")
+  ·   ℹ Analyzing message from scanner on channel findings...
+  ✓ workflow analyst (0s)
+  ▸ workflow reviewer (message="\"Summary: \"Found 3 issues in aut...", chan="report", sender="analyst")
+  ·   ℹ Reviewing message from analyst on channel report...
+  ·   ! Critical issue: "Summary: "Found 3 issues in auth module""
+  ✓ workflow reviewer (0s)
+
+✓ PASS workflow default (0.1s)
+```
+
+*After* (target: human-readable; exact formatting is up to implementation—key is no `\"` noise and no ambiguous nested `"` around the same payload):
+
+```
+Jaiph: Running agent_inbox.jh
+
+workflow default
+  ▸ workflow scanner
+  ·   ℹ Scanning for issues...
+  ✓ workflow scanner (0s)
+  ▸ workflow analyst (message="Found 3 issues in auth module", chan="findings", sender="scanner")
+  ·   ℹ Analyzing message from scanner on channel findings...
+  ✓ workflow analyst (0s)
+  ▸ workflow reviewer (message="Summary: Found 3 issues in auth module…", chan="report", sender="analyst")
+  ·   ℹ Reviewing message from analyst on channel report...
+  ·   ! Critical issue: Summary: Found 3 issues in auth module
+  ✓ workflow reviewer (0s)
+
+✓ PASS workflow default (0.1s)
+```
+
+**Context (read before starting)**
+
+* Reproduce: `CI=true ./examples/agent_inbox.jh` from repo root; inspect stderr lines for `workflow analyst`, `workflow reviewer`, and any `! Critical issue:` line.
+* Likely surfaces: interpolation of workflow args into step labels, or string formatting when echoing channel payloads; may involve `stderr-handler`, docker/run summary, or workflow event text builders.
+* Fix should not change the example’s semantics—only how quoted strings are encoded for human-readable TTY output.
+
+**Acceptance criteria**
+
+* `CI=true ./examples/agent_inbox.jh` output shows no `\"` escape sequences inside displayed `message=...` fragments unless intentionally documenting raw JSON; reviewer/critical lines do not contain ambiguous nested `"` pairs for this fixture.
+* A regression test exists (unit or golden on formatted line) that fails if a payload like `Found 3 issues in "auth" module` is rendered with broken quoting.
+
+***
+
 ## Cleanup — consolidate the 5-way test directory split #dev-ready
 
 **Goal**
diff --git a/src/cli/run/stderr-handler.test.ts b/src/cli/run/stderr-handler.test.ts
index c17e8f93..019f10de 100644
--- a/src/cli/run/stderr-handler.test.ts
+++ b/src/cli/run/stderr-handler.test.ts
@@ -53,3 +53,33 @@ test("registerTTYSubscriber: STEP_END fallback indent uses event depth", () => {
   const output = writes.join("");
   assert.match(output, /^  ·   ✓ prompt prompt \(1s\)\n$/);
 });
+
+test("registerTTYSubscriber: stderr_line renders immediately in TTY mode", () => {
+  const emitter = createRunEmitter();
+  const ctx: TTYContext = {
+    isTTY: true,
+    colorEnabled: false,
+    startedAt: Date.now(),
+    runningInterval: undefined,
+    nonTTYHeartbeatInterval: undefined,
+    nonTTYHeartbeatStep: null,
+  };
+  const writes: string[] = [];
+  const originalWrite = process.stdout.write.bind(process.stdout);
+  (process.stdout.write as unknown as (chunk: string) => boolean) = ((chunk: string | Uint8Array) => {
+    writes.push(typeof chunk === "string" ? chunk : Buffer.from(chunk).toString("utf8"));
+    return true;
+  }) as unknown as typeof process.stdout.write;
+
+  try {
+    registerTTYSubscriber(emitter, ctx);
+    emitter.emit("stderr_line", {
+      line: "jaiph docker: workspace overlay unavailable; copying workspace into a temp directory before startup",
+    });
+  } finally {
+    (process.stdout.write as unknown as typeof process.stdout.write) = originalWrite as typeof process.stdout.write;
+  }
+
+  const output = writes.join("");
+  assert.equal(output, "jaiph docker: workspace overlay unavailable; copying workspace into a temp directory before startup\n");
+});
diff --git a/src/cli/run/stderr-handler.ts b/src/cli/run/stderr-handler.ts
index 5f7272b5..8a979ec6 100644
--- a/src/cli/run/stderr-handler.ts
+++ b/src/cli/run/stderr-handler.ts
@@ -266,15 +266,10 @@ export function registerTTYSubscriber(emitter: RunEmitter, ctx: TTYContext): voi
   });
 
   emitter.on("stderr_line", (data) => {
-    if (ctx.isTTY && ctx.runningInterval !== undefined) {
-      process.stdout.write("\r\u001b[K\u001b[1A\r\u001b[K");
-    }
-    if (!ctx.isTTY) {
+    if (ctx.isTTY) {
+      writeTTYLine(data.line, ctx, "single");
+    } else {
       process.stderr.write(`${data.line}\n`);
     }
-    if (ctx.isTTY && ctx.runningInterval !== undefined) {
-      const elapsedSec = (Date.now() - ctx.startedAt) / 1000;
-      process.stdout.write(formatRunningBottomLine("default", elapsedSec));
-    }
   });
 }
diff --git a/src/runtime/docker.test.ts b/src/runtime/docker.test.ts
index 20f00cb8..c7984216 100644
--- a/src/runtime/docker.test.ts
+++ b/src/runtime/docker.test.ts
@@ -434,10 +434,17 @@ test("writeOverlayScript: creates executable script with fuse-overlayfs setup",
     assert.ok(content.startsWith("#!/usr/bin/env bash"));
     assert.ok(content.includes("fuse-overlayfs"));
     assert.ok(content.includes("workspace overlay unavailable"));
+    assert.ok(content.includes("copying workspace into a temp directory before startup"));
     assert.ok(content.includes("using copy fallback"));
+    assert.ok(content.includes("live output begins after the copy completes"));
+    assert.ok(content.includes("excludes .jaiph/runs"));
     assert.ok(content.includes("mktemp -d /tmp/jaiph-workspace."));
     assert.ok(content.includes("rewrite_workspace_path()"));
-    assert.ok(content.includes('rsync -a --delete --no-owner --no-group'));
+    assert.ok(content.includes("--exclude='.jaiph/runs'"));
+    assert.ok(content.includes('rsync -a --delete --exclude=\'.jaiph/runs\' --no-owner --no-group'));
+    assert.ok(content.includes("case \"$entry\" in"));
+    assert.ok(content.includes(".|..|.jaiph) continue ;;"));
+    assert.ok(content.includes(".|..|runs) continue ;;"));
     assert.ok(content.includes("mktemp \"$MERGED/.jaiph-overlay-probe.XXXXXX\""));
     assert.ok(content.includes('exec "${rewritten_args[@]}"'));
   } finally {
diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts
index 9911f279..f13a38c8 100644
--- a/src/runtime/docker.ts
+++ b/src/runtime/docker.ts
@@ -339,12 +339,35 @@ rewrite_workspace_path() {
 
 copy_workspace_with_rsync() {
   local target="$1"
-  rsync -a --delete --no-owner --no-group --chmod=Du+rwx,Dgo+rx,Fu+rw,Fgo+r "$LOWER"/ "$target"/
+  rsync -a --delete --exclude='.jaiph/runs' --no-owner --no-group --chmod=Du+rwx,Dgo+rx,Fu+rw,Fgo+r "$LOWER"/ "$target"/
 }
 
 copy_workspace_with_cp() {
   local target="$1"
-  cp -a --no-preserve=ownership "$LOWER"/. "$target"/
+  mkdir -p "$target"
+  (
+    cd "$LOWER"
+    shopt -s dotglob nullglob
+    for entry in * .*; do
+      case "$entry" in
+        .|..|.jaiph) continue ;;
+      esac
+      cp -a --no-preserve=ownership "$entry" "$target"/
+    done
+    if [ -d ".jaiph" ]; then
+      mkdir -p "$target/.jaiph"
+      (
+        cd ".jaiph"
+        shopt -s dotglob nullglob
+        for entry in * .*; do
+          case "$entry" in
+            .|..|runs) continue ;;
+          esac
+          cp -a --no-preserve=ownership "$entry" "$target/.jaiph"/
+        done
+      )
+    fi
+  )
   chmod -R u+rwX "$target" 2>/dev/null || true
 }
 
@@ -366,6 +389,11 @@ else
   overlay_reason="fuse-overlayfs unavailable or /dev/fuse missing"
 fi
 if [ "$overlay_ok" -ne 1 ]; then
+  printf 'jaiph docker: workspace overlay unavailable; copying workspace into a temp directory before startup (live output begins after the copy completes; excludes .jaiph/runs)' >&2
+  if [ -n "$overlay_reason" ]; then
+    printf ' (%s)' "$overlay_reason" >&2
+  fi
+  printf '\n' >&2
   tmp_workspace="$(mktemp -d /tmp/jaiph-workspace.XXXXXX 2>/dev/null || true)"
   if [ -n "$tmp_workspace" ]; then
     if command -v rsync >/dev/null 2>&1; then

From 5eb9d4ce6a99743b5da360e360516fac132b2c5c Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Mon, 20 Apr 2026 21:49:37 +0200
Subject: [PATCH 20/38] Fix: Remove double-quote escaping noise from step
 titles and log output

Strip backslash-escaped quotes from display formatting so workflow step
labels and log lines render human-readably. Three layers changed:
formatNamedParamsForDisplay and formatParamsForDisplay no longer escape
inner double quotes with backslash (the surrounding key="value"
delimiters are structural, not shell-safe); formatStartLine in display.ts
applies the same change for prompt previews; and node-workflow-runtime
strips outer quotes from interpolated channel-send payloads via
stripOuterQuotes so messages flow through dispatch without literal quote
wrappers. Regression tests added; E2E golden output updated.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CHANGELOG.md                                  |  1 +
 QUEUE.md                                      | 60 -------------------
 e2e/tests/110_examples.sh                     |  6 +-
 .../commands/format-params-display.test.ts    | 19 ++++++
 src/cli/commands/format-params.ts             |  4 +-
 src/cli/run/display.test.ts                   |  5 +-
 src/cli/run/display.ts                        |  2 +-
 src/runtime/kernel/node-workflow-runtime.ts   |  2 +-
 8 files changed, 30 insertions(+), 69 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 406bc5ac..34078a3e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ## All changes
 
+- **Fix — CLI/Runtime:** Clean up double-quote rendering in step titles and log output — Parameter values displayed in the progress tree (e.g. `message="Found 3 issues in auth module"`) no longer produce backslash-escaped `\"` sequences or ambiguous nested `""` pairs. The fix touches three layers: `formatNamedParamsForDisplay` and `formatParamsForDisplay` in `src/cli/commands/format-params.ts` no longer escape inner double quotes with backslash (the surrounding `key="value"` delimiters are structural, not shell-safe); `formatStartLine` in `src/cli/run/display.ts` applies the same change for prompt previews; and `node-workflow-runtime.ts` strips outer quotes from interpolated channel-send payloads via `stripOuterQuotes` so messages like `"Found 3 issues"` are stored as `Found 3 issues` rather than carrying literal quote wrappers through dispatch. Regression tests added: `format-params-display.test.ts` asserts no `\"` in formatted output for payloads containing inner quotes; `display.test.ts` verifies prompt previews pass through quotes without escaping. E2E golden output for `agent_inbox.jh` updated to match.
 - **Cleanup — Runtime:** Remove dead per-call-isolated leftovers from Docker runtime — Deleted `exportWorkspacePatch` and `findRunArtifacts` from `src/runtime/docker.ts`, `exportPatchIfDocker` from `src/runtime/kernel/node-workflow-runtime.ts`, the `findRunArtifacts` call in `src/cli/commands/run.ts`, and ~150 LoC of dead tests in `src/runtime/docker.test.ts`. These functions were written for the abandoned per-call `isolated` keyword and have been fully replaced by the `artifacts.jh` library (`artifacts.save_patch()` for workspace patches, `JAIPH_ARTIFACTS_DIR` for artifact discovery). The automatic `workspace.patch` export during Docker teardown is removed — workflows that need a patch now request one explicitly via the artifacts library. Docs updated (`docs/sandboxing.md`, `docs/architecture.md`, `docs/artifacts.md`).
 - **Cleanup — Repo:** Delete top-level debug cruft and harden `.gitignore` — Removed 22+ leftover debug directories at the repo root (`docker-nested-arg.*`, `docker-nested-clean.*`, `overlay-warn.*`, `nested-run-arg.*`, `local-nested-arg.*`, `overlay-manual.*`, `docker-live-debug.*`, and similar) from an abandoned per-call isolation experiment. Also deleted stale tracked files: `safe_name`, `QUEUE.md.tmp.4951`, and empty top-level `lib/` and `run/` directories — none had live consumers in the source tree. Added `.gitignore` patterns (`docker-*/`, `nested-*/`, `overlay-*/`, `local-*/`, `.tmp*/`, `QUEUE.md.tmp.*`) under a `# debug / temp directories (never commit)` section so these cannot return without a deliberate `git add -f` override. No code changes; filesystem hygiene only.
 - **Test — E2E/Runtime:** PTY-based TTY test for `run async` progress rendering — New E2E test (`e2e/tests/131_tty_async_progress.sh`) exercises the live progress tree rendering path for `run async` workflows under a real PTY. The test spawns `jaiph run` with a workflow that fans out two concurrent async branches (`run async branch_a()`, `run async branch_b()`), each emitting deterministic progress events over time (log + script steps with sleep). A Python `pty.openpty()` harness captures the raw PTY stream and asserts: (1) each branch's progress events appear under the correct subscript-numbered node (₁, ₂) in the tree, (2) the final frame shows both branches completed with their resolved `Handle<T>` return values (`result-a`, `result-b`), (3) no orphaned ANSI escape sequences survive after CSI stripping, and (4) a `RUNNING` frame was observed during live rendering. The test uses only deterministic steps (no `prompt claude` or external dependencies) and `assert_contains` checks with order-insensitive matching to tolerate async interleaving. This closes a regression-coverage gap — the existing `81_tty_progress_tree.sh` covers synchronous workflows but not the async handle/deferred-resolution render path. Docs updated (`docs/testing.md`, `docs/spec-async-handles.md`).
diff --git a/QUEUE.md b/QUEUE.md
index 684d81ca..fc79bdc6 100644
--- a/QUEUE.md
+++ b/QUEUE.md
@@ -13,66 +13,6 @@ Process rules:
 
 ***
 
-## Bug — double quotes in workflow step / log output (agent_inbox) #dev-ready
-
-**Goal**
-When running `CI=true ./examples/agent_inbox.jh`, step titles and reviewer lines show broken or noisy quoting: e.g. `message="\"Found 3 issues in auth module\""`, and `! Critical issue: "Summary: "Found 3 issues in auth module""`. Messages that contain double quotes should render readably (single consistent escaping or structured display), not as nested `\"` soup or ambiguous `""` pairs.
-
-**Sample — before / after**
-
-*Before* (current; excerpt from `CI=true ./examples/agent_inbox.jh`):
-
-```
-Jaiph: Running agent_inbox.jh
-
-workflow default
-  ▸ workflow scanner
-  ·   ℹ Scanning for issues...
-  ✓ workflow scanner (0s)
-  ▸ workflow analyst (message="\"Found 3 issues in auth module\"", chan="findings", sender="scanner")
-  ·   ℹ Analyzing message from scanner on channel findings...
-  ✓ workflow analyst (0s)
-  ▸ workflow reviewer (message="\"Summary: \"Found 3 issues in aut...", chan="report", sender="analyst")
-  ·   ℹ Reviewing message from analyst on channel report...
-  ·   ! Critical issue: "Summary: "Found 3 issues in auth module""
-  ✓ workflow reviewer (0s)
-
-✓ PASS workflow default (0.1s)
-```
-
-*After* (target: human-readable; exact formatting is up to implementation—key is no `\"` noise and no ambiguous nested `"` around the same payload):
-
-```
-Jaiph: Running agent_inbox.jh
-
-workflow default
-  ▸ workflow scanner
-  ·   ℹ Scanning for issues...
-  ✓ workflow scanner (0s)
-  ▸ workflow analyst (message="Found 3 issues in auth module", chan="findings", sender="scanner")
-  ·   ℹ Analyzing message from scanner on channel findings...
-  ✓ workflow analyst (0s)
-  ▸ workflow reviewer (message="Summary: Found 3 issues in auth module…", chan="report", sender="analyst")
-  ·   ℹ Reviewing message from analyst on channel report...
-  ·   ! Critical issue: Summary: Found 3 issues in auth module
-  ✓ workflow reviewer (0s)
-
-✓ PASS workflow default (0.1s)
-```
-
-**Context (read before starting)**
-
-* Reproduce: `CI=true ./examples/agent_inbox.jh` from repo root; inspect stderr lines for `workflow analyst`, `workflow reviewer`, and any `! Critical issue:` line.
-* Likely surfaces: interpolation of workflow args into step labels, or string formatting when echoing channel payloads; may involve `stderr-handler`, docker/run summary, or workflow event text builders.
-* Fix should not change the example’s semantics—only how quoted strings are encoded for human-readable TTY output.
-
-**Acceptance criteria**
-
-* `CI=true ./examples/agent_inbox.jh` output shows no `\"` escape sequences inside displayed `message=...` fragments unless intentionally documenting raw JSON; reviewer/critical lines do not contain ambiguous nested `"` pairs for this fixture.
-* A regression test exists (unit or golden on formatted line) that fails if a payload like `Found 3 issues in "auth" module` is rendered with broken quoting.
-
-***
-
 ## Cleanup — consolidate the 5-way test directory split #dev-ready
 
 **Goal**
diff --git a/e2e/tests/110_examples.sh b/e2e/tests/110_examples.sh
index 373bd965..9c6075b8 100755
--- a/e2e/tests/110_examples.sh
+++ b/e2e/tests/110_examples.sh
@@ -56,12 +56,12 @@ workflow default
   ▸ workflow scanner
   ·   ℹ Scanning for issues...
   ✓ workflow scanner (<time>)
-  ▸ workflow analyst (message="\"Found 3 issues in auth module\"", chan="findings", sender="scanner")
+  ▸ workflow analyst (message="Found 3 issues in auth module", chan="findings", sender="scanner")
   ·   ℹ Analyzing message from scanner on channel findings...
   ✓ workflow analyst (<time>)
-  ▸ workflow reviewer (message="\"Summary: \"Found 3 issues in aut...", chan="report", sender="analyst")
+  ▸ workflow reviewer (message="Summary: Found 3 issues in auth ...", chan="report", sender="analyst")
   ·   ℹ Reviewing message from analyst on channel report...
-  ·   ! Critical issue: "Summary: "Found 3 issues in auth module""
+  ·   ! Critical issue: Summary: Found 3 issues in auth module
   ✓ workflow reviewer (<time>)
 ✓ PASS workflow default (<time>)
 EOF
diff --git a/src/cli/commands/format-params-display.test.ts b/src/cli/commands/format-params-display.test.ts
index 064b9d19..c5b4beb9 100644
--- a/src/cli/commands/format-params-display.test.ts
+++ b/src/cli/commands/format-params-display.test.ts
@@ -125,6 +125,25 @@ test("buildStepDisplayParamPairs uses declared names when arity matches", () =>
   ]);
 });
 
+test("formatNamedParamsForDisplay does not produce backslash-quote escaping", () => {
+  const params: Array<[string, string]> = [
+    ["message", 'Found 3 issues in "auth" module'],
+  ];
+  const result = formatNamedParamsForDisplay(params);
+  assert.ok(!result.includes('\\"'), "no backslash-quote escaping in display output");
+  assert.ok(result.includes('"auth"'), "inner quotes pass through for readability");
+  assert.equal(result, ' (message="Found 3 issues in "auth" module")');
+});
+
+test("formatParamsForDisplay does not produce backslash-quote escaping", () => {
+  const params: Array<[string, string]> = [
+    ["message", 'Found 3 issues in "auth" module'],
+  ];
+  const result = formatParamsForDisplay(params);
+  assert.ok(!result.includes('\\"'), "no backslash-quote escaping in display output");
+  assert.ok(result.includes('"auth"'), "inner quotes pass through for readability");
+});
+
 test("buildStepDisplayParamPairs falls back to numeric or argN positional keys", () => {
   assert.deepEqual(buildStepDisplayParamPairs(["x"], undefined, { positionalStyle: "numeric" }), [["1", "x"]]);
   assert.deepEqual(buildStepDisplayParamPairs(["x"], [], { positionalStyle: "numeric" }), [["1", "x"]]);
diff --git a/src/cli/commands/format-params.ts b/src/cli/commands/format-params.ts
index 937d6562..9699c1a8 100644
--- a/src/cli/commands/format-params.ts
+++ b/src/cli/commands/format-params.ts
@@ -66,7 +66,7 @@ export function formatNamedParamsForDisplay(params: Array<[string, string]>, opt
     const normalized = normalizeParamValue(v);
     const visible =
       normalized.length > MAX_PARAM_VALUE_DISPLAY ? `${normalized.slice(0, MAX_PARAM_VALUE_DISPLAY)}...` : normalized;
-    const escaped = visible.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
+    const escaped = visible.replace(/\\/g, "\\\\");
     const isPositional = /^arg\d+$/.test(k) || /^[1-9]\d*$/.test(k);
     const key = allPositional && isPositional ? String(positionalSeq++) : displayKey(k);
     return `${key}="${escaped}"`;
@@ -91,7 +91,7 @@ export function formatParamsForDisplay(params: Array<[string, string]>, options?
     const visible =
       normalized.length > MAX_PARAM_VALUE_DISPLAY ? `${normalized.slice(0, MAX_PARAM_VALUE_DISPLAY)}...` : normalized;
     const needsQuotes = /[\s,]/.test(visible) || visible.includes('"');
-    const escaped = visible.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
+    const escaped = visible.replace(/\\/g, "\\\\");
     return needsQuotes ? `"${escaped}"` : visible;
   });
   let result = ` (${parts.join(", ")})`;
diff --git a/src/cli/run/display.test.ts b/src/cli/run/display.test.ts
index cded0ba1..187f74ab 100644
--- a/src/cli/run/display.test.ts
+++ b/src/cli/run/display.test.ts
@@ -228,12 +228,13 @@ test("formatStartLine: prompt preview escapes backslashes", () => {
   assert.ok(result.includes("\\\\"), "backslashes should be escaped");
 });
 
-test("formatStartLine: prompt preview escapes double quotes", () => {
+test("formatStartLine: prompt preview passes through double quotes", () => {
   const params: Array<[string, string]> = [
     ["prompt_text", 'say "hello"'],
   ];
   const result = formatStartLine("  ", "prompt", "prompt", false, params);
-  assert.ok(result.includes('\\"hello\\"'), "quotes should be escaped");
+  assert.ok(result.includes('"hello"'), "quotes should pass through");
+  assert.ok(!result.includes('\\"'), "no backslash-quote escaping");
 });
 
 test("formatStartLine: prompt preview escapes backslash before quote", () => {
diff --git a/src/cli/run/display.ts b/src/cli/run/display.ts
index 2c3d3c45..7f7af22a 100644
--- a/src/cli/run/display.ts
+++ b/src/cli/run/display.ts
@@ -49,7 +49,7 @@ export function formatStartLine(
       oneLine.length > PROMPT_PREVIEW_MAX
         ? `${oneLine.slice(0, PROMPT_PREVIEW_MAX)}...`
         : oneLine;
-    const escaped = previewDisplay.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
+    const escaped = previewDisplay.replace(/\\/g, "\\\\");
     const backendPart = name !== kind ? ` ${name}` : "";
     namePart = previewDisplay.length > 0 ? `${kindLabel}${backendPart} "${escaped}"` : `${kindLabel}${backendPart}`;
     const restParams = params.filter(([, v]) => !isInternalParamValue(v));
diff --git a/src/runtime/kernel/node-workflow-runtime.ts b/src/runtime/kernel/node-workflow-runtime.ts
index 00389d65..a3723196 100644
--- a/src/runtime/kernel/node-workflow-runtime.ts
+++ b/src/runtime/kernel/node-workflow-runtime.ts
@@ -949,7 +949,7 @@ export class NodeWorkflowRuntime {
             step.rhs.tripleQuoted ? tripleQuotedRawForRuntime(step.rhs.token) : step.rhs.token;
           const sendIr = await this.interpolateWithCaptures(sendTok, scope);
           if (!sendIr.ok) return this.mergeStepResult(accOut, accErr, sendIr.result);
-          payload = sendIr.value;
+          payload = stripOuterQuotes(sendIr.value);
         } else if (step.rhs.kind === "var") {
           const sendHandleErr = await this.resolveHandlesInInput(scope, step.rhs.bash);
           if (sendHandleErr) return this.mergeStepResult(accOut, accErr, sendHandleErr);

From eaec3b76b9c1b6c439c448d384a109caad057812 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Mon, 20 Apr 2026 22:05:11 +0200
Subject: [PATCH 21/38] Docker: Publish runtime from .jaiph/Dockerfile; pull
 GHCR on run

- CI: build/push ghcr.io/jaiphlang/jaiph-runtime from .jaiph/Dockerfile on
  nightly branch (:nightly) and version tags (:<semver>, :latest); pass
  JAIPH_REPO_REF for install ref.
- Runtime: resolveImage always uses configured/default image with pull +
  jaiph check; stop auto docker build of workspace .jaiph/Dockerfile on
  jaiph run (keep runtime.docker_image / JAIPH_DOCKER_*).
- Docs and E2E aligned; unit test contract updated for resolveImage.

Signed-off-by: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
---
 .github/workflows/docker-publish.yml        | 36 ++++---------
 .jaiph/Dockerfile                           |  4 +-
 docs/cli.md                                 |  2 +-
 docs/configuration.md                       |  2 +-
 docs/sandboxing.md                          | 17 +++---
 e2e/tests/73_docker_dockerfile_detection.sh | 57 ++++++++-------------
 src/runtime/docker.test.ts                  | 46 +++--------------
 src/runtime/docker.ts                       | 31 ++++-------
 8 files changed, 60 insertions(+), 135 deletions(-)

diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
index 29b1c82f..50e9543d 100644
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -21,25 +21,6 @@ jobs:
       - name: Checkout
         uses: actions/checkout@v4
 
-      - name: Setup Node
-        uses: actions/setup-node@v4
-        with:
-          node-version: "20"
-          cache: npm
-
-      - name: Install dependencies and build
-        run: |
-          npm ci
-          npm run build
-
-      - name: Create npm tarball
-        run: npm pack --pack-destination docker/
-
-      - name: Rename tarball for Dockerfile
-        run: |
-          cd docker
-          mv jaiph-*.tgz jaiph.tgz
-
       - name: Log in to GHCR
         uses: docker/login-action@v3
         with:
@@ -47,26 +28,29 @@ jobs:
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
 
-      - name: Determine image tags
-        id: tags
+      - name: Image tags and jaiph ref
+        id: meta
         run: |
           if [[ "${GITHUB_REF}" == refs/tags/v* ]]; then
             VERSION="${GITHUB_REF_NAME#v}"
             echo "tags=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${VERSION},${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest" >> "$GITHUB_OUTPUT"
+            echo "jaiph_ref=v${VERSION}" >> "$GITHUB_OUTPUT"
           else
             echo "tags=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:nightly" >> "$GITHUB_OUTPUT"
+            echo "jaiph_ref=nightly" >> "$GITHUB_OUTPUT"
           fi
 
       - name: Build and push
         uses: docker/build-push-action@v6
         with:
-          context: docker
-          file: docker/Dockerfile.runtime
+          context: .jaiph
+          file: .jaiph/Dockerfile
           push: true
-          tags: ${{ steps.tags.outputs.tags }}
-          build-args: JAIPH_TARBALL=jaiph.tgz
+          tags: ${{ steps.meta.outputs.tags }}
+          build-args: |
+            JAIPH_REPO_REF=${{ steps.meta.outputs.jaiph_ref }}
 
       - name: Verify pushed image contains jaiph
         run: |
-          TAG="$(echo '${{ steps.tags.outputs.tags }}' | cut -d',' -f1)"
+          TAG="$(echo '${{ steps.meta.outputs.tags }}' | cut -d',' -f1)"
           docker run --rm --entrypoint sh "${TAG}" -lc "command -v jaiph && jaiph --version"
diff --git a/.jaiph/Dockerfile b/.jaiph/Dockerfile
index 62d5531c..102d5b53 100644
--- a/.jaiph/Dockerfile
+++ b/.jaiph/Dockerfile
@@ -50,7 +50,7 @@ RUN mkdir -p "$HOME/.local/bin" && \
     rm -f /tmp/install-cursor-agent.sh
 
 # jaiph (official installer: https://jaiph.org/install)
-RUN curl -fsSL https://jaiph.org/install | bash
-RUN jaiph use nightly
+ARG JAIPH_REPO_REF=nightly
+RUN JAIPH_REPO_REF=${JAIPH_REPO_REF} curl -fsSL https://jaiph.org/install | bash
 
 WORKDIR /jaiph/workspace
diff --git a/docs/cli.md b/docs/cli.md
index a124946d..b7b07047 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -304,7 +304,7 @@ Creates:
 
 - `.jaiph/.gitignore` — lists `runs` and `tmp`. If the file already exists and does not match this exact list, `jaiph init` exits with a non-zero status.
 - `.jaiph/bootstrap.jh` — canonical bootstrap workflow; made executable. The template uses a triple-quoted multiline prompt body (`prompt """ ... """`) so the generated file parses and compiles as valid Jaiph. It also asks the agent to review/update `.jaiph/Dockerfile` for this repository and ends by logging a summary (`WHAT CHANGED` + `WHY`).
-- `.jaiph/Dockerfile` — Docker sandbox template that extends the official `ghcr.io/jaiphlang/jaiph-runtime:nightly` image with agent CLIs (Claude Code, cursor-agent). The base image already contains Node.js, jaiph, and `fuse-overlayfs`, so the generated Dockerfile only adds project-specific tooling. If the file is missing, init creates it. If it already exists and includes the init marker comment, init updates it to the latest template. Otherwise (custom user-managed Dockerfile), init leaves it unchanged and prints a note.
+- `.jaiph/Dockerfile` — Docker sandbox template that extends the official `ghcr.io/jaiphlang/jaiph-runtime:nightly` image with agent CLIs (Claude Code, cursor-agent). The base image already contains Node.js, jaiph, and `fuse-overlayfs`, so the generated Dockerfile only adds project-specific tooling. `jaiph run` does not build this file automatically; build or publish the image yourself and set `runtime.docker_image` / `JAIPH_DOCKER_IMAGE` if you need a custom image. If the file is missing, init creates it. If it already exists and includes the init marker comment, init updates it to the latest template. Otherwise (custom user-managed Dockerfile), init leaves it unchanged and prints a note.
 - `.jaiph/SKILL.md` — copied from the skill file bundled with your Jaiph installation (or from `JAIPH_SKILL_PATH` when set). If no skill file is found, this file is not written and a note is printed.
 
 ## `jaiph install`
diff --git a/docs/configuration.md b/docs/configuration.md
index f377c7fa..b6fec6b1 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -172,7 +172,7 @@ These configure Docker sandboxing. Unlike agent and run keys, runtime keys are r
 | Key | Type | Default | Env variable | Description |
 |-----|------|---------|--------------|-------------|
 | `runtime.docker_enabled` | boolean | `true` locally; `false` when `CI=true` or `JAIPH_UNSAFE=true` | `JAIPH_DOCKER_ENABLED` | Enable Docker for this run. See [Sandboxing -- Enabling Docker](sandboxing.md#enabling-docker) for the default rule. |
-| `runtime.docker_image` | string | `ghcr.io/jaiphlang/jaiph-runtime:<version>` | `JAIPH_DOCKER_IMAGE` | Image name. Must already contain `jaiph`. When unset, Jaiph builds from `.jaiph/Dockerfile` if it exists, otherwise uses the official GHCR image matching the installed jaiph version. |
+| `runtime.docker_image` | string | `ghcr.io/jaiphlang/jaiph-runtime:<version>` | `JAIPH_DOCKER_IMAGE` | Image name. Must already contain `jaiph`. When unset, uses the official GHCR image tag matching the installed jaiph version. A workspace `.jaiph/Dockerfile` is not built automatically on `jaiph run` — build and push (or tag locally) your own image, then set this key or `JAIPH_DOCKER_IMAGE`. |
 | `runtime.docker_network` | string | `default` | `JAIPH_DOCKER_NETWORK` | Docker network mode. |
 | `runtime.docker_timeout` | integer | `300` | `JAIPH_DOCKER_TIMEOUT` | Timeout in seconds. Invalid or unparsable values fall back to the default. |
 | `runtime.workspace` | string[] | `[".:/jaiph/workspace:rw"]` | _(no env override)_ | Mount list. Only settable via in-file config or defaults. |
diff --git a/docs/sandboxing.md b/docs/sandboxing.md
index 19c5a1b9..d458ed83 100644
--- a/docs/sandboxing.md
+++ b/docs/sandboxing.md
@@ -161,7 +161,7 @@ Docker-related errors use `E_DOCKER_*` codes for programmatic detection:
 |------------|---------|----------|
 | `E_DOCKER_NOT_FOUND` | `docker info` fails (Docker not installed or daemon not running) | Run exits immediately. No fallback to local execution. |
 | `E_DOCKER_PULL` | `docker pull` fails (network error, image not found, auth failure) | Run exits. Check registry access and image name. |
-| `E_DOCKER_BUILD` | `docker build` from `.jaiph/Dockerfile` fails | Run exits. Fix the Dockerfile and retry. |
+| `E_DOCKER_BUILD` | `docker build` fails when using helpers that build from a Dockerfile | Not used during normal `jaiph run` image resolution. |
 | `E_DOCKER_NO_JAIPH` | Selected image does not contain a `jaiph` CLI | Run exits with guidance to use the official image or install jaiph. |
 | `E_DOCKER_RUNS_DIR` | Absolute `JAIPH_RUNS_DIR` points outside the workspace | Run exits. Use a relative path or an absolute path within the workspace. |
 | `E_VALIDATE_MOUNT` | Mount targets a denied host path (`/`, `/proc`, docker socket, etc.) | Run exits before container launch. |
@@ -183,20 +183,15 @@ Jaiph publishes official runtime images to GHCR:
 | `ghcr.io/jaiphlang/jaiph-runtime:nightly` | `nightly` branch | Contributors and CI |
 | `ghcr.io/jaiphlang/jaiph-runtime:latest` | Latest release tag | Convenience alias |
 
-The default `runtime.docker_image` is `ghcr.io/jaiphlang/jaiph-runtime:<version>` where `<version>` matches the installed jaiph package version. The official image includes Node.js, jaiph, `fuse-overlayfs`, and a non-root `jaiph` user (UID 10001). It does **not** include agent CLIs (Claude Code, cursor-agent) to keep the image small. To add agent CLIs, extend the official image or use a custom `.jaiph/Dockerfile` (see below).
+The default `runtime.docker_image` is `ghcr.io/jaiphlang/jaiph-runtime:<version>` where `<version>` matches the installed jaiph package version. Published tags (`:<semver>`, `:nightly`, `:latest`) are built from this repository’s `.jaiph/Dockerfile` (see CI). The image includes Node.js, jaiph, `fuse-overlayfs`, agent CLIs where the Dockerfile installs them, and a non-root `jaiph` user (UID 10001).
 
-### Dockerfile-based image detection
+### `.jaiph/Dockerfile` vs `jaiph run`
 
-The runtime considers the image explicitly configured when either `runtime.docker_image` appears in the file or `JAIPH_DOCKER_IMAGE` is set in the environment. In that case, `.jaiph/Dockerfile` is not consulted.
+`jaiph run` **always** uses the configured image (`runtime.docker_image`, `JAIPH_DOCKER_IMAGE`, or the default GHCR tag above). It **does not** run `docker build` against `.jaiph/Dockerfile` on each run. Use that file as the recipe to build and publish (or `docker build` + `docker tag`) an image, then point `runtime.docker_image` / `JAIPH_DOCKER_IMAGE` at it.
 
-When the image is not explicit:
+After the image is pulled or found locally, Jaiph verifies that `jaiph` is available inside the container. If the check fails, the run exits with `E_DOCKER_NO_JAIPH`.
 
-1. If `.jaiph/Dockerfile` exists in the workspace root, the runtime builds it, tags the result `jaiph-runtime:latest`, and uses that image. Build failure produces `E_DOCKER_BUILD`.
-2. Otherwise, the default image (`ghcr.io/jaiphlang/jaiph-runtime:<version>`) is pulled if needed.
-
-After resolving the image (whether from a Dockerfile build, an explicit image, or the default), Jaiph verifies that `jaiph` is available inside the container. If the check fails, the run exits with `E_DOCKER_NO_JAIPH`.
-
-The `jaiph init` scaffold generates a `.jaiph/Dockerfile` that extends the official runtime image with agent CLIs (Claude Code, cursor-agent). The Dockerfile does not need to copy any jaiph runtime files — `overlay-run.sh` is generated by the host CLI and mounted into the container at runtime.
+The `jaiph init` scaffold generates a `.jaiph/Dockerfile` for project-specific sandbox tooling. `overlay-run.sh` is generated by the host CLI and mounted into the container at runtime.
 
 ### Extending the official image
 
diff --git a/e2e/tests/73_docker_dockerfile_detection.sh b/e2e/tests/73_docker_dockerfile_detection.sh
index 286e9bd1..638d429b 100644
--- a/e2e/tests/73_docker_dockerfile_detection.sh
+++ b/e2e/tests/73_docker_dockerfile_detection.sh
@@ -23,52 +23,39 @@ if ! e2e::ensure_docker_test_image; then
   exit 0
 fi
 
-e2e::section "docker dockerfile detection — custom Dockerfile builds and runs"
+e2e::section "docker — invalid .jaiph/Dockerfile is not built on run"
 
-# Given: a .jaiph/Dockerfile that produces an image with jaiph AND a marker file.
-# We install jaiph from a local tarball so the custom image satisfies the strict contract.
+# Given: a syntactically invalid .jaiph/Dockerfile. If `jaiph run` tried to build it,
+# the run would fail with E_DOCKER_BUILD. The driver must use the default GHCR image instead.
 mkdir -p "${TEST_DIR}/.jaiph"
+printf '%s\n' 'THIS IS NOT A VALID DOCKERFILE' > "${TEST_DIR}/.jaiph/Dockerfile"
 
-(cd "${ROOT_DIR}" && npm pack --pack-destination "${TEST_DIR}/.jaiph" >/dev/null 2>&1)
-tarball_name="$(ls "${TEST_DIR}/.jaiph"/jaiph-*.tgz 2>/dev/null | head -1 | xargs basename)"
-
-cat > "${TEST_DIR}/.jaiph/Dockerfile" <<DOCKERFILE
-FROM node:20-bookworm-slim
-RUN apt-get update && apt-get install -y --no-install-recommends bash fuse-overlayfs fuse3 rsync && rm -rf /var/lib/apt/lists/*
-RUN useradd --create-home --uid 10001 --shell /bin/bash jaiph && mkdir -p /jaiph/workspace /jaiph/workspace-ro /jaiph/run && chown -R jaiph:jaiph /jaiph
-COPY ${tarball_name} /tmp/jaiph.tgz
-RUN npm install -g /tmp/jaiph.tgz && rm -f /tmp/jaiph.tgz
-RUN touch /jaiph-runtime-marker
-USER jaiph
-ENV HOME=/home/jaiph
-ENV PATH="/home/jaiph/.local/bin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
-WORKDIR /jaiph/workspace
-DOCKERFILE
-
-e2e::file "dockerfile_detect.jh" <<'EOF'
-script check_marker_impl = ```
-test -f /jaiph-runtime-marker && echo "marker found"
+e2e::file "dockerfile_ignored.jh" <<'EOF'
+script ping_impl = ```
+echo "pulled default image ok"
 ```
-rule check_marker() {
-  run check_marker_impl()
+rule ping() {
+  run ping_impl()
 }
 
 workflow default() {
-  ensure check_marker()
+  ensure ping()
 }
 EOF
 
-# When: run with Docker enabled and no explicit docker_image
-JAIPH_DOCKER_ENABLED=true jaiph run "${TEST_DIR}/dockerfile_detect.jh" >/dev/null 2>&1
+# When: Docker enabled, implicit default image (pull ghcr.io/jaiphlang/jaiph-runtime:<version>)
+if ! JAIPH_DOCKER_ENABLED=true jaiph run "${TEST_DIR}/dockerfile_ignored.jh" >/dev/null 2>&1; then
+  JAIPH_DOCKER_ENABLED=true jaiph run "${TEST_DIR}/dockerfile_ignored.jh" || true
+  e2e::fail "docker: run should use pulled default image, not build broken .jaiph/Dockerfile"
+fi
 
-# Then: the workflow should succeed (marker file present = custom image was used)
-run_dir="$(e2e::run_dir "dockerfile_detect.jh")"
-e2e::expect_run_file "dockerfile_detect.jh" "000003-script__check_marker_impl.out" "marker found"
-e2e::pass "docker: .jaiph/Dockerfile detected and image built"
+run_dir="$(e2e::run_dir "dockerfile_ignored.jh")"
+e2e::expect_run_file "dockerfile_ignored.jh" "000003-script__ping_impl.out" "pulled default image ok"
+e2e::pass "docker: broken .jaiph/Dockerfile is ignored; default runtime image is used"
 
-e2e::section "docker dockerfile detection — explicit image skips Dockerfile"
+e2e::section "docker — explicit image with present .jaiph/Dockerfile"
 
-# Given: same workspace with .jaiph/Dockerfile, but explicit image set
+# Given: same workspace with invalid .jaiph/Dockerfile, explicit image set
 e2e::file "dockerfile_skip.jh" <<'EOF'
 script check_no_marker_impl = ```
 if test -f /jaiph-runtime-marker; then
@@ -91,9 +78,9 @@ JAIPH_DOCKER_ENABLED=true JAIPH_DOCKER_IMAGE="${E2E_DOCKER_TEST_IMAGE}" jaiph ru
 
 # Then: the marker file should NOT exist (E2E test image, not custom build)
 e2e::expect_run_file "dockerfile_skip.jh" "000003-script__check_no_marker_impl.out" "no marker"
-e2e::pass "docker: explicit image skips .jaiph/Dockerfile"
+e2e::pass "docker: explicit image used; .jaiph/Dockerfile not built"
 
-e2e::section "docker dockerfile detection — fallback without Dockerfile uses configured image"
+e2e::section "docker — workspace without .jaiph/Dockerfile uses configured image"
 
 # Given: a separate test dir without .jaiph/Dockerfile, using the E2E test image
 fallback_dir="$(mktemp -d "${JAIPH_E2E_WORK_DIR}/docker_fallback.XXXXXX")"
diff --git a/src/runtime/docker.test.ts b/src/runtime/docker.test.ts
index c7984216..b36bd64e 100644
--- a/src/runtime/docker.test.ts
+++ b/src/runtime/docker.test.ts
@@ -11,8 +11,6 @@ import {
   overlayMountPath,
   resolveDockerHostRunsRoot,
   writeOverlayScript,
-  resolveImage,
-  buildImageFromDockerfile,
   verifyImageHasJaiph,
   isEnvDenied,
   ENV_DENYLIST_PREFIXES,
@@ -495,43 +493,7 @@ test("GHCR_IMAGE_REPO: points to official registry", () => {
 });
 
 // ---------------------------------------------------------------------------
-// resolveImage
-// ---------------------------------------------------------------------------
-
-test("resolveImage: uses Dockerfile when imageExplicit is false and Dockerfile exists", () => {
-  const tmpDir = mkdtempSync(join(tmpdir(), "jaiph-resolve-image-"));
-  try {
-    mkdirSync(join(tmpDir, ".jaiph"), { recursive: true });
-    writeFileSync(join(tmpDir, ".jaiph", "Dockerfile"), "FROM ubuntu:latest\n");
-    const dockerfilePath = join(tmpDir, ".jaiph", "Dockerfile");
-    assert.ok(existsSync(dockerfilePath));
-  } finally {
-    rmSync(tmpDir, { recursive: true, force: true });
-  }
-});
-
-test("resolveImage: skips Dockerfile when imageExplicit is true", () => {
-  const tmpDir = mkdtempSync(join(tmpdir(), "jaiph-resolve-image-"));
-  try {
-    mkdirSync(join(tmpDir, ".jaiph"), { recursive: true });
-    writeFileSync(join(tmpDir, ".jaiph", "Dockerfile"), "FROM ubuntu:latest\n");
-    const config: DockerRunConfig = {
-      enabled: true,
-      image: "custom:image",
-      imageExplicit: true,
-      network: "default",
-      timeout: 300,
-      mounts: [{ hostPath: ".", containerPath: "/jaiph/workspace", mode: "rw" }],
-    };
-    assert.ok(existsSync(join(tmpDir, ".jaiph", "Dockerfile")));
-    assert.equal(config.imageExplicit, true);
-  } finally {
-    rmSync(tmpDir, { recursive: true, force: true });
-  }
-});
-
-// ---------------------------------------------------------------------------
-// Strict contract: no auto-build, no npm pack bootstrap
+// Strict contract: no on-run workspace Dockerfile build, no npm pack bootstrap
 // ---------------------------------------------------------------------------
 
 test("docker.ts: no auto-build or npm-pack bootstrap code", () => {
@@ -541,6 +503,12 @@ test("docker.ts: no auto-build or npm-pack bootstrap code", () => {
   assert.ok(!src.includes("jaiph-runtime-auto"), "docker.ts must not reference auto-derived image tag");
   assert.ok(!src.includes("ensureLocalRuntimeImage"), "docker.ts must not contain ensureLocalRuntimeImage");
   assert.ok(!src.includes("buildRuntimeImageFromLocalPackage"), "docker.ts must not contain buildRuntimeImageFromLocalPackage");
+  assert.ok(
+    /export function resolveImage\(config: DockerRunConfig\): string \{[\s\S]*?pullImageIfNeeded\(image\);[\s\S]*?verifyImageHasJaiph\(image\);/.test(
+      src,
+    ),
+    "resolveImage must pull and verify config.image only (no workspace Dockerfile build)",
+  );
 });
 
 test("verifyImageHasJaiph: throws E_DOCKER_NO_JAIPH with guidance for missing jaiph", () => {
diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts
index f13a38c8..0138fe08 100644
--- a/src/runtime/docker.ts
+++ b/src/runtime/docker.ts
@@ -1,5 +1,5 @@
 import { execFileSync, execSync, spawn, ChildProcess } from "node:child_process";
-import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
+import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join, resolve, dirname, relative } from "node:path";
 import type { RuntimeConfig } from "../types";
@@ -281,26 +281,17 @@ export function verifyImageHasJaiph(image: string): void {
 /**
  * Resolve the Docker image to use.
  *
- * When the image was not explicitly configured (`imageExplicit === false`),
- * checks for `.jaiph/Dockerfile` in the workspace root. If present, builds
- * from it and verifies jaiph is present. Otherwise uses the configured
- * (default) image — the official GHCR runtime image — and pulls if needed.
+ * Always uses `config.image` (from env, in-file `runtime.docker_image`, or the
+ * default `ghcr.io/jaiphlang/jaiph-runtime:<version>`). Pulls from the registry
+ * if the image is not present locally. Does not build from `.jaiph/Dockerfile`;
+ * use that file to build/push a custom image and set `runtime.docker_image` or
+ * `JAIPH_DOCKER_IMAGE` to reference it.
  *
- * All images are verified to contain `jaiph` before use. If the image
- * lacks jaiph, the run fails immediately with guidance.
+ * Verifies that `jaiph` exists in the image before use (`E_DOCKER_NO_JAIPH`).
  */
-export function resolveImage(config: DockerRunConfig, workspaceRoot: string): string {
-  let image = config.image;
-  if (!config.imageExplicit) {
-    const dockerfilePath = join(workspaceRoot, ".jaiph", "Dockerfile");
-    if (existsSync(dockerfilePath)) {
-      image = buildImageFromDockerfile(dockerfilePath);
-    } else {
-      pullImageIfNeeded(image);
-    }
-  } else {
-    pullImageIfNeeded(image);
-  }
+export function resolveImage(config: DockerRunConfig): string {
+  const image = config.image;
+  pullImageIfNeeded(image);
   verifyImageHasJaiph(image);
   return image;
 }
@@ -663,7 +654,7 @@ export interface DockerSpawnResult {
  */
 export function spawnDockerProcess(opts: DockerSpawnOptions): DockerSpawnResult {
   checkDockerAvailable();
-  const resolvedImage = resolveImage(opts.config, opts.workspaceRoot);
+  const resolvedImage = resolveImage(opts.config);
   opts = { ...opts, config: { ...opts.config, image: resolvedImage } };
 
   mkdirSync(opts.sandboxRunDir, { recursive: true });

From 712c5b963a846cd3f718065953aacea52ddc84a9 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Mon, 20 Apr 2026 22:22:59 +0200
Subject: [PATCH 22/38] Docs: Drop historical phrasing; relocate runtime image
 to runtime/

- Move published Docker recipe to runtime/Dockerfile; CI builds from runtime/.
- jaiph init: stop creating .jaiph/Dockerfile; bootstrap prompt and tests/e2e updated.
- Reference docs: describe current behavior only (sandboxing patches, inbox send,
  grammar/testing notes, jaiph-skill, libraries comment).

Signed-off-by: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
---
 .github/workflows/docker-publish.yml |  4 +-
 docker/Dockerfile.runtime            |  2 +-
 docs/cli.md                          |  3 +-
 docs/configuration.md                |  6 +--
 docs/grammar.md                      |  2 +-
 docs/inbox.md                        |  2 +-
 docs/jaiph-skill.md                  | 11 ++--
 docs/libraries.md                    |  2 +-
 docs/sandboxing.md                   | 12 ++---
 docs/setup.md                        |  3 +-
 docs/testing.md                      |  2 +-
 e2e/tests/00_install_and_init.sh     | 58 +++------------------
 {.jaiph => runtime}/Dockerfile       |  0
 src/cli/commands/init.test.ts        | 10 +---
 src/cli/commands/init.ts             | 78 ++--------------------------
 src/runtime/docker.ts                |  5 +-
 16 files changed, 38 insertions(+), 162 deletions(-)
 rename {.jaiph => runtime}/Dockerfile (100%)

diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
index 50e9543d..ab7c4b01 100644
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -43,8 +43,8 @@ jobs:
       - name: Build and push
         uses: docker/build-push-action@v6
         with:
-          context: .jaiph
-          file: .jaiph/Dockerfile
+          context: runtime
+          file: runtime/Dockerfile
           push: true
           tags: ${{ steps.meta.outputs.tags }}
           build-args: |
diff --git a/docker/Dockerfile.runtime b/docker/Dockerfile.runtime
index 0e3baad6..c45972d3 100644
--- a/docker/Dockerfile.runtime
+++ b/docker/Dockerfile.runtime
@@ -5,7 +5,7 @@
 # copy-on-write workspace overlay.
 #
 # Agent CLIs (Claude Code, cursor-agent) are excluded to keep the image small.
-# To add them, extend this image or use a custom .jaiph/Dockerfile.
+# To add them, extend this image or build a custom image and set JAIPH_DOCKER_IMAGE.
 #
 # Tags:
 #   :<semver>  — matches the jaiph npm package version (built on release)
diff --git a/docs/cli.md b/docs/cli.md
index b7b07047..4f897972 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -303,8 +303,7 @@ jaiph init [workspace-path]
 Creates:
 
 - `.jaiph/.gitignore` — lists `runs` and `tmp`. If the file already exists and does not match this exact list, `jaiph init` exits with a non-zero status.
-- `.jaiph/bootstrap.jh` — canonical bootstrap workflow; made executable. The template uses a triple-quoted multiline prompt body (`prompt """ ... """`) so the generated file parses and compiles as valid Jaiph. It also asks the agent to review/update `.jaiph/Dockerfile` for this repository and ends by logging a summary (`WHAT CHANGED` + `WHY`).
-- `.jaiph/Dockerfile` — Docker sandbox template that extends the official `ghcr.io/jaiphlang/jaiph-runtime:nightly` image with agent CLIs (Claude Code, cursor-agent). The base image already contains Node.js, jaiph, and `fuse-overlayfs`, so the generated Dockerfile only adds project-specific tooling. `jaiph run` does not build this file automatically; build or publish the image yourself and set `runtime.docker_image` / `JAIPH_DOCKER_IMAGE` if you need a custom image. If the file is missing, init creates it. If it already exists and includes the init marker comment, init updates it to the latest template. Otherwise (custom user-managed Dockerfile), init leaves it unchanged and prints a note.
+- `.jaiph/bootstrap.jh` — canonical bootstrap workflow; made executable. The template uses a triple-quoted multiline prompt body (`prompt """ ... """`) so the generated file parses and compiles as valid Jaiph. It asks the agent to scaffold workflows under `.jaiph/` and ends by logging a summary (`WHAT CHANGED` + `WHY`). Docker sandboxing uses the default `ghcr.io/jaiphlang/jaiph-runtime` image unless you set `runtime.docker_image` or `JAIPH_DOCKER_IMAGE`.
 - `.jaiph/SKILL.md` — copied from the skill file bundled with your Jaiph installation (or from `JAIPH_SKILL_PATH` when set). If no skill file is found, this file is not written and a note is printed.
 
 ## `jaiph install`
diff --git a/docs/configuration.md b/docs/configuration.md
index b6fec6b1..e6ef897d 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -167,12 +167,12 @@ workflow default() {
 
 These configure Docker sandboxing. Unlike agent and run keys, runtime keys are resolved by the `jaiph run` CLI at launch — not by the workflow runtime. They can only appear in **module-level** config blocks (not workflow-level).
 
-> Docker sandboxing is in **beta**. See [Sandboxing](sandboxing.md) for mounts, workspace layout, Dockerfile detection, env forwarding, path remapping, and container behavior.
+> Docker sandboxing is in **beta**. See [Sandboxing](sandboxing.md) for mounts, workspace layout, env forwarding, path remapping, and container behavior.
 
 | Key | Type | Default | Env variable | Description |
 |-----|------|---------|--------------|-------------|
 | `runtime.docker_enabled` | boolean | `true` locally; `false` when `CI=true` or `JAIPH_UNSAFE=true` | `JAIPH_DOCKER_ENABLED` | Enable Docker for this run. See [Sandboxing -- Enabling Docker](sandboxing.md#enabling-docker) for the default rule. |
-| `runtime.docker_image` | string | `ghcr.io/jaiphlang/jaiph-runtime:<version>` | `JAIPH_DOCKER_IMAGE` | Image name. Must already contain `jaiph`. When unset, uses the official GHCR image tag matching the installed jaiph version. A workspace `.jaiph/Dockerfile` is not built automatically on `jaiph run` — build and push (or tag locally) your own image, then set this key or `JAIPH_DOCKER_IMAGE`. |
+| `runtime.docker_image` | string | `ghcr.io/jaiphlang/jaiph-runtime:<version>` | `JAIPH_DOCKER_IMAGE` | Image name. Must already contain `jaiph`. When unset, uses the official GHCR image tag matching the installed jaiph version. For a custom image, build and push (or tag locally), then set this key or `JAIPH_DOCKER_IMAGE`. |
 | `runtime.docker_network` | string | `default` | `JAIPH_DOCKER_NETWORK` | Docker network mode. |
 | `runtime.docker_timeout` | integer | `300` | `JAIPH_DOCKER_TIMEOUT` | Timeout in seconds. Invalid or unparsable values fall back to the default. |
 | `runtime.workspace` | string[] | `[".:/jaiph/workspace:rw"]` | _(no env override)_ | Mount list. Only settable via in-file config or defaults. |
@@ -350,4 +350,4 @@ The runtime also sets `JAIPH_ARTIFACTS_DIR` — the absolute path to the writabl
 
 ## Created by `jaiph init`
 
-`jaiph init` creates `.jaiph/bootstrap.jh`, `.jaiph/Dockerfile`, and writes `.jaiph/SKILL.md` from the skill file bundled with your installation (see `JAIPH_SKILL_PATH` in the CLI reference). It does not add a separate config file — use `config { ... }` in your workflow sources.
+`jaiph init` creates `.jaiph/bootstrap.jh` and writes `.jaiph/SKILL.md` from the skill file bundled with your installation (see `JAIPH_SKILL_PATH` in the CLI reference). It does not add a separate config file — use `config { ... }` in your workflow sources.
diff --git a/docs/grammar.md b/docs/grammar.md
index f0346ab0..60538818 100644
--- a/docs/grammar.md
+++ b/docs/grammar.md
@@ -468,7 +468,7 @@ prompt "Review the following code for security issues"
 answer = prompt "Summarize the report"
 ```
 
-If a `"` string has no closing quote on the same line, the parser rejects it with: `multiline prompt strings are no longer supported; use a triple-quoted block instead`.
+If a `"` string has no closing quote on the same line, the parser rejects it — multiline prompt text must use a triple-quoted block (`prompt """..."""`).
 
 **2. Identifier reference**
 
diff --git a/docs/inbox.md b/docs/inbox.md
index c2349f90..e3afe517 100644
--- a/docs/inbox.md
+++ b/docs/inbox.md
@@ -109,7 +109,7 @@ workflow researcher() {
 }
 ```
 
-An explicit RHS is always required — bare `channel <-` (forward syntax) is no longer supported.
+An explicit RHS is always required — bare `channel <-` (without a value) is invalid.
 
 The `<-` operator is only recognized when it appears outside of quoted strings
 on the surrounding line so channel names and literals are not misread as send
diff --git a/docs/jaiph-skill.md b/docs/jaiph-skill.md
index e3e07c8c..584561e3 100644
--- a/docs/jaiph-skill.md
+++ b/docs/jaiph-skill.md
@@ -21,7 +21,7 @@ The **JS kernel** (`src/runtime/kernel/`) handles **prompt** execution, **manage
 
 **Test lane:** `jaiph test` runs **`*.test.jh`** in-process (`node-test-runner.ts`): **`buildScripts(workspace)`**, then **`buildRuntimeGraph(testFile)` once per file**, mocks, and assertions — same `NodeWorkflowRuntime` as `jaiph run`.
 
-**After `jaiph init`**, a repository gets `.jaiph/bootstrap.jh` (a triple-quoted prompt that tells the agent to read `.jaiph/SKILL.md`), `.jaiph/Dockerfile` (project sandbox image template), and a copy of this file. The bootstrap prompt explicitly asks the agent to review/update `.jaiph/Dockerfile` for the current repo and to end with a clear `WHAT CHANGED` + `WHY` summary. The expected outcome is a **minimal workflow set** for safe feature work: preflight checks, an implementation workflow, verification, and a `workflow default` entrypoint that wires them together (with an optional human-or-agent “review” step when you use a task queue).
+**After `jaiph init`**, a repository gets `.jaiph/bootstrap.jh` (a triple-quoted prompt that tells the agent to read `.jaiph/SKILL.md`) and a copy of this file. The bootstrap prompt asks the agent to scaffold workflows under `.jaiph/` and to end with a clear `WHAT CHANGED` + `WHY` summary. The expected outcome is a **minimal workflow set** for safe feature work: preflight checks, an implementation workflow, verification, and a `workflow default` entrypoint that wires them together (with an optional human-or-agent “review” step when you use a task queue). Docker-backed runs use the official `ghcr.io/jaiphlang/jaiph-runtime` image by default; see [Sandboxing](sandboxing.md) to override with `runtime.docker_image` or `JAIPH_DOCKER_IMAGE`.
 
 **Concepts:**
 
@@ -53,7 +53,7 @@ Use this loop whenever you add or change Jaiph workflows so failures surface bef
 | `jaiph run <file.jh> [args...]` | Execute `workflow default` in the given file |
 | `jaiph test [path]` | Run `*.test.jh` test files (workspace, directory, or single file) |
 | `jaiph format [--check] <file.jh ...>` | Reformat `.jh` files (or verify formatting without writing) |
-| `jaiph init [workspace]` | Scaffold `.jaiph/` with bootstrap workflow, Dockerfile template, and skill file |
+| `jaiph init [workspace]` | Scaffold `.jaiph/` with bootstrap workflow and skill file |
 | `jaiph install [url[@version]]` | Install or restore project-scoped libraries under `.jaiph/libs/` |
 | `jaiph use <version\|nightly>` | Reinstall Jaiph at a specific version or nightly |
 
@@ -79,7 +79,7 @@ Ignore any outdated Markdown that contradicts the above.
 
 A **minimal workflow set** under `.jaiph/` that matches the delivery loop above:
 
-1. **Sandbox baseline** — Review/update `.jaiph/Dockerfile` first so container execution matches the repository's actual build/test/runtime/tooling needs. Keep Jaiph installed via `curl -fsSL https://jaiph.org/install | bash`.
+1. **Sandbox baseline (optional)** — If the repo uses Docker sandboxing, confirm `runtime.docker_image` / `JAIPH_DOCKER_IMAGE` match the tooling the team needs; the default is `ghcr.io/jaiphlang/jaiph-runtime` (see [Sandboxing](sandboxing.md)).
 2. **Preflight** — Rules and `ensure` for repo state and required tools (e.g. clean git, required binaries). Expose a small workflow (e.g. `workflow default` in `readiness.jh`) that runs these checks.
 3. **Review (optional)** — A workflow that reviews queued tasks before development starts (any filename, e.g. `ba_review.jh`). An agent prompt evaluates the next task for clarity, consistency, conflicts, and feasibility, then either marks it as ready or exits with questions. The implementation workflow gates on this marker so unreviewed tasks cannot proceed. This repository’s `.jaiph/architect_review.jh` is one concrete example; it uses `QUEUE.md` as the task queue.
 4. **Implementation** — A workflow that drives coding changes (typically via `prompt`), e.g. `workflow implement` in `main.jh`. When using a task queue, the implementation workflow should check that the first task is marked as ready (e.g. via a `<!-- dev-ready -->` marker) before proceeding.
@@ -98,7 +98,7 @@ Prefer composable modules over one large file.
   - **run** — `run ref` or `run ref([args...])` runs a workflow or script (local or `alias.name`). **Parentheses are optional when passing zero arguments** — `run setup` is equivalent to `run setup()`. When arguments are present, parentheses are required with comma-separated expressions. **`run` does not forward args by default** — pass named params explicitly (e.g. `run wf(task)`, `run util_fn(name)`). **Bare identifier arguments** are supported and preferred: `run greet(name)` is equivalent to `run greet("${name}")` — the identifier must reference a known variable (`const`, capture, or named parameter); unknown names fail with `E_VALIDATE`. **Standalone `"${identifier}"` in call arguments is rejected** — use the bare form instead (e.g. `run greet(name)` not `run greet("${name}")`). Quoted strings with additional text around the interpolation (e.g. `"prefix_${name}"`) are still allowed. Jaiph keywords cannot be used as bare identifiers. **Nested managed calls in arguments** are supported with explicit keywords: `run foo(run bar())`, `run foo(ensure check())`, `run foo(run \`echo ok\`())`. Bare call-like forms in arguments (`run foo(bar())`, `run foo(\`echo ok\`())`) are rejected — add the `run` or `ensure` keyword. Optionally `run ref([args]) catch (<name>) <body>`: the recovery body runs **once** on failure (same semantics as `ensure … catch`). Works in both workflows and rules. Optionally `run ref([args]) recover (<name>) <body>`: repair-and-retry loop — on failure, binds error output, runs the repair body, and retries the target. Loop stops on success or when `run.recover_limit` (default 10) is exhausted. `recover` and `catch` are mutually exclusive on the same step. Workflows only. Also supports **inline scripts**: `` run `body`(args) `` or `` run ```lang...body...```(args) `` — see Scripts section above.
   - **log** — `log "message"` writes the expanded message to **stdout** and emits a **`LOG`** event; the CLI shows it in the progress tree at the current depth. Double-quoted string; `${identifier}` interpolation works at runtime. For multiline messages, use triple quotes: `log """..."""`. **Bare identifier form:** `log foo` (no quotes) expands to `log "${foo}"` — the variable's value is logged. Works with `const`, capture, and named parameters. **Inline capture interpolation** is also supported: `${run ref([args])}` and `${ensure ref([args])}` execute a managed call and inline the result (e.g. `log "Got: ${run greet()}"`). Nested inline captures are rejected. **`LOG`** events and `run_summary.jsonl` store the **same** message string (JSON-escaped for the payload). No spinner, no timing — a static annotation. See [CLI Reference](cli.md) for tree formatting. Useful for marking workflow phases (e.g. `log "Starting analysis phase"`).
   - **logerr** — `logerr "message"` is identical to `log` except the message goes to **stderr** and the event type is **`LOGERR`**. In the progress tree, `logerr` lines use a red `!` instead of the dim `ℹ` used by `log`. Same quoting, interpolation, bare identifier, and triple-quote rules as `log` (e.g. `logerr err_msg`, `logerr """..."""`).
-  - **Send** — After `<-`, use a **double-quoted literal**, **triple-quoted block** (`channel <- """..."""`), **`${var}`**, or **`run ref([args])`**. An explicit RHS is always required — bare `channel <-` (forward syntax) has been removed. Raw shell on the RHS is rejected — use `const x = run helper()` then `channel <- "${x}"`, or `channel <- run fmt_fn()`. Combining capture and send (`name = channel <- …`) is `E_PARSE`. See [Inbox & Dispatch](inbox.md).
+  - **Send** — After `<-`, use a **double-quoted literal**, **triple-quoted block** (`channel <- """..."""`), **`${var}`**, or **`run ref([args])`**. An explicit RHS is always required — bare `channel <-` (without a value) is invalid. Raw shell on the RHS is rejected — use `const x = run helper()` then `channel <- "${x}"`, or `channel <- run fmt_fn()`. Combining capture and send (`name = channel <- …`) is `E_PARSE`. See [Inbox & Dispatch](inbox.md).
   - **Route** — Routes are declared **at the top level** on channel declarations: `channel name -> workflow_ref` or `channel name -> wf1, wf2`. A `->` inside a workflow body is a **parse error** with guidance to move it to the channel declaration. When a message arrives on the channel, the runtime calls each listed **workflow** (local or `alias.workflow`), binding the dispatch values (message, channel, sender) to the target's 3 declared parameters. Route targets must declare exactly 3 parameters. Scripts and rules are not valid route targets. The dispatch queue drains after the orchestrator completes. **`NodeWorkflowRuntime` does not cap dispatch iterations** — avoid circular sends that grow the queue without bound. See [Inbox & Dispatch](inbox.md).
   - **Bindings and capture** — `const name = …` (the `const` keyword is required for all captures). For **`ensure`** / **`run` to a workflow or rule**, capture is the callee’s explicit **`return "…"`**. For **`run` to a script**, capture follows **stdout** from the script body. **`prompt`** capture is the agent answer. **`const`** RHS cannot use `$(...)` or disallowed `${...}` forms — use a **`script`** and `const x = run helper(…)`. **`const`** must not use a **bare** `ref(args…)` call shape: use **`const x = run ref(args…)`** (or **`ensure`** for rules), not **`const x = ref(args…)`** — the compiler fails with **`E_PARSE`** and suggests the **`run`** form. Do not put Jaiph symbols inside `$(...)` — use `ensure` / `run`. See [Grammar](grammar.md#step-output-contract).
   - **return** — `return "value"` / `return "${var}"` / `return """..."""` sets the managed return value. Also supports **direct managed calls**: `return run ref()` or `return run ref(args)` and `return ensure ref()` or `return ensure ref(args)` — these execute the target and use its result as the return value, equivalent to `const x = run ref(args)` then `return "${x}"`. Parentheses are required on all call sites.
@@ -161,7 +161,7 @@ Conventions:
 - **Parallelism:** `run async ref([args...])` for managed async with implicit join. For concurrent **bash**, use `&` and the shell builtin `wait` inside a **`script`** and call it with `run`. Do not call Jaiph internals from background subprocesses unless you understand `run.inbox_parallel` locking.
 - **Shell conditions:** Express conditionals with `run` to a **script** and handle failure with `catch`, or use `if` / `match` for value branching. Short-circuit brace groups remain valid **inside `script`** bodies: `cmd || { ... }`.
 - **No shell redirection around managed calls:** `run foo() > file`, `run foo() | cmd`, `run foo() &` are all `E_PARSE` errors — shell operators (`>`, `>>`, `|`, `&`) are not supported adjacent to `run` or `ensure` steps. Move shell pipelines and redirections into a **`script`** block and call it with `run`.
-- **Script reuse:** Prefer `import script "./tool.py" as tool` (or a sibling `.jh` module) instead of maintaining ad-hoc bash outside the compiler. Do not rely on a workspace-level shared-bash directory; that mechanism is being removed from the product (see `QUEUE.md`).
+- **Script reuse:** Prefer `import script "./tool.py" as tool` (or a sibling `.jh` module) instead of maintaining ad-hoc bash outside the compiler. Avoid informal workspace-level shared-bash directories that bypass the module graph.
 - **Unified namespace:** Channels, rules, workflows, scripts, script import aliases, and module-scoped `local`/`const` share a single namespace per module (`E_PARSE` on collision).
 - **Calling conventions (compiler-enforced):** `ensure` must target a rule — using it on a workflow or script is `E_VALIDATE`. `run` in a **workflow** must target a workflow or script; `run` in a **rule** must target a **script** only. **Type crossing:** `string` and `script` are distinct primitive types — `prompt` rejects script names, `run` rejects string consts, assigning a script to a `const` or interpolating `${scriptName}` are all `E_VALIDATE`. See [Grammar — Types](grammar.md#types). Jaiph symbols must not appear inside `$(...)` in bash contexts the compiler still scans (principally **`script`** bodies). Script bodies cannot contain `run`, `ensure`, `config`, nested definitions, routes, or Jaiph `fail` / `const` / `log` / `logerr` / `return "…"`.
 
@@ -217,7 +217,6 @@ test "handles failure gracefully" {
 ## Suggested Starter Layout
 
 - `.jaiph/bootstrap.jh` — Created by `jaiph init`; contains a single triple-quoted prompt (`prompt """ ... """`) that points the agent at `.jaiph/SKILL.md` (a copy of this guide).
-- `.jaiph/Dockerfile` — Created by `jaiph init`; base Docker sandbox template. Review and tailor runtime/build/test tooling to the current repository.
 - `.jaiph/readiness.jh` — Preflight: rules and `workflow default` that runs readiness checks.
 - `.jaiph/ba_review.jh` (or any name you choose) — (Optional) Pre-implementation review: reads tasks from a queue file, sends one to an agent for review, and marks it dev-ready or exits with questions. This repository uses `.jaiph/architect_review.jh` with `QUEUE.md`.
 - `.jaiph/verification.jh` — Verification: rules and `workflow default` for lint/test/build.
diff --git a/docs/libraries.md b/docs/libraries.md
index c4357d1a..d852e9e6 100644
--- a/docs/libraries.md
+++ b/docs/libraries.md
@@ -64,7 +64,7 @@ workflow default() {
   # Returns the absolute path of the saved patch file.
   const patch = run artifacts.save_patch("snapshot.patch")
 
-  # Apply a previously-saved patch to the current workspace.
+  # Apply a saved patch to the current workspace.
   run artifacts.apply_patch(patch)
 }
 ```
diff --git a/docs/sandboxing.md b/docs/sandboxing.md
index d458ed83..518afd78 100644
--- a/docs/sandboxing.md
+++ b/docs/sandboxing.md
@@ -145,7 +145,7 @@ The working directory is `/jaiph/workspace`. The host CLI generates `overlay-run
 
 **Workspace immutability contract** -- Docker runs cannot directly modify the host workspace. The host checkout is bind-mounted read-only; the writable `/jaiph/workspace` inside the container is a sandbox-local copy-on-write layer (fuse-overlayfs or copy fallback) whose state is discarded on container exit. The only persistence channel from a Docker run to the host is the run-artifacts directory (`/jaiph/run` → host `.jaiph/runs`). Non-Docker (local) runs are unaffected by this contract.
 
-**Workspace patch export** -- The runtime no longer automatically exports a `workspace.patch` during teardown. Workflows that need to capture workspace changes should use the `artifacts.save_patch(name)` library function from `jaiphlang/artifacts`, which writes a named `git diff` patch into the `artifacts/` subdirectory. This is explicit and composable — callers choose when and what to capture, and the patch lands in the standard artifacts directory alongside other published files. See [Libraries — `jaiphlang/artifacts`](libraries.md#jaiphlangartifacts--publishing-files-out-of-the-sandbox). The bundled `.jaiph/Dockerfile` image includes `git`.
+**Workspace patch export** -- To capture workspace changes as a patch, call `artifacts.save_patch(name)` from `jaiphlang/artifacts`; it writes a named `git diff` into the `artifacts/` subdirectory under the run tree. Callers choose when and what to record; output lives alongside other artifacts. See [Libraries — `jaiphlang/artifacts`](libraries.md#jaiphlangartifacts--publishing-files-out-of-the-sandbox). The published GHCR runtime image includes `git`.
 
 **Network** -- `"default"` omits `--network`, which uses Docker's default bridge network (outbound access allowed). `"none"` passes `--network none` and fully disables networking -- use this for workflows that should not make external calls. Any other value (e.g. a custom Docker network name) is passed through as-is. Set `runtime.docker_network` in config or `JAIPH_DOCKER_NETWORK` in the environment.
 
@@ -183,19 +183,19 @@ Jaiph publishes official runtime images to GHCR:
 | `ghcr.io/jaiphlang/jaiph-runtime:nightly` | `nightly` branch | Contributors and CI |
 | `ghcr.io/jaiphlang/jaiph-runtime:latest` | Latest release tag | Convenience alias |
 
-The default `runtime.docker_image` is `ghcr.io/jaiphlang/jaiph-runtime:<version>` where `<version>` matches the installed jaiph package version. Published tags (`:<semver>`, `:nightly`, `:latest`) are built from this repository’s `.jaiph/Dockerfile` (see CI). The image includes Node.js, jaiph, `fuse-overlayfs`, agent CLIs where the Dockerfile installs them, and a non-root `jaiph` user (UID 10001).
+The default `runtime.docker_image` is `ghcr.io/jaiphlang/jaiph-runtime:<version>` where `<version>` matches the installed jaiph package version. Published tags (`:<semver>`, `:nightly`, `:latest`) are built from the `runtime/Dockerfile` in the jaiph repository (see `.github/workflows/docker-publish.yml`). The image includes Node.js, jaiph, `fuse-overlayfs`, agent CLIs where that Dockerfile installs them, and a non-root `jaiph` user (UID 10001).
 
-### `.jaiph/Dockerfile` vs `jaiph run`
+### Custom images and `jaiph run`
 
-`jaiph run` **always** uses the configured image (`runtime.docker_image`, `JAIPH_DOCKER_IMAGE`, or the default GHCR tag above). It **does not** run `docker build` against `.jaiph/Dockerfile` on each run. Use that file as the recipe to build and publish (or `docker build` + `docker tag`) an image, then point `runtime.docker_image` / `JAIPH_DOCKER_IMAGE` at it.
+`jaiph run` **always** uses the configured image (`runtime.docker_image`, `JAIPH_DOCKER_IMAGE`, or the default GHCR tag above). It does not run `docker build` for you. Build and publish (or `docker build` + `docker tag`) your own image, then set `runtime.docker_image` / `JAIPH_DOCKER_IMAGE`.
 
 After the image is pulled or found locally, Jaiph verifies that `jaiph` is available inside the container. If the check fails, the run exits with `E_DOCKER_NO_JAIPH`.
 
-The `jaiph init` scaffold generates a `.jaiph/Dockerfile` for project-specific sandbox tooling. `overlay-run.sh` is generated by the host CLI and mounted into the container at runtime.
+`overlay-run.sh` is generated by the host CLI and mounted into the container at runtime.
 
 ### Extending the official image
 
-To add project-specific tools or agent CLIs to the official image, create a `.jaiph/Dockerfile`:
+To add project-specific tools or agent CLIs, extend the published image in your own Dockerfile (build locally or in CI), then point `runtime.docker_image` at the result:
 
 ```dockerfile
 FROM ghcr.io/jaiphlang/jaiph-runtime:nightly
diff --git a/docs/setup.md b/docs/setup.md
index d0c3221b..501c8fc0 100644
--- a/docs/setup.md
+++ b/docs/setup.md
@@ -91,8 +91,7 @@ jaiph init
 This creates a `.jaiph/` directory in your project root with:
 
 - `.jaiph/.gitignore` — ignores ephemeral `runs/` and `tmp/` under `.jaiph/` (workflows and libraries stay tracked)
-- `.jaiph/bootstrap.jh` — an interactive workflow that asks an agent to scaffold recommended workflows for your project. The generated template uses a triple-quoted multiline prompt (`prompt """ ... """`), explicitly asks the agent to review/update `.jaiph/Dockerfile` for this repository's sandbox needs, and logs a final summary of what changed and why
-- `.jaiph/Dockerfile` — generated project sandbox image template (`ubuntu:latest`, common utilities, Node.js LTS, Claude Code CLI, cursor-agent). It installs Jaiph with the default installer path: `curl -fsSL https://jaiph.org/install | bash`
+- `.jaiph/bootstrap.jh` — an interactive workflow that asks an agent to scaffold recommended workflows for your project. The generated template uses a triple-quoted multiline prompt (`prompt """ ... """`) and logs a final summary of what changed and why. Docker-backed runs use the official `ghcr.io/jaiphlang/jaiph-runtime` image by default (see [Sandboxing](sandboxing.md)).
 - `.jaiph/SKILL.md` — the agent skill file for AI assistants authoring `.jh` workflows (from your Jaiph installation, or `JAIPH_SKILL_PATH`)
 
 Run the bootstrap workflow to get started:
diff --git a/docs/testing.md b/docs/testing.md
index 2571ca96..a85ec98b 100644
--- a/docs/testing.md
+++ b/docs/testing.md
@@ -123,7 +123,7 @@ mock script w.helper() {
 }
 ```
 
-The former `mock function` syntax is no longer accepted — the parser emits an error with migration guidance.
+Test stubs use `mock script`, not `mock function`; the latter is a parse error with a fix hint.
 
 ### Workflow run (with capture)
 
diff --git a/e2e/tests/00_install_and_init.sh b/e2e/tests/00_install_and_init.sh
index 7e5dc51c..279db221 100644
--- a/e2e/tests/00_install_and_init.sh
+++ b/e2e/tests/00_install_and_init.sh
@@ -46,16 +46,13 @@ workflow default() {
     Perform these tasks in order:
     1) Analyze repository structure, languages, package manager, and build/test/lint commands.
     2) Detect existing contribution conventions (branching, commit style, CI checks).
-    3) Review and update .jaiph/Dockerfile for this specific repository.
-       - This file defines the Docker sandbox (runtimes, package managers, build tools).
-       - Align it with how this project is actually built and tested.
-    4) Create or update Jaiph workflows under .jaiph/ for safe feature implementation, including:
+    3) Create or update Jaiph workflows under .jaiph/ for safe feature implementation, including:
        - preflight checks (clean git state, branch guards when relevant)
        - implementation workflow
        - verification workflow (tests/lint/build)
-    5) Keep workflows minimal, composable, and specific to this project.
-    6) Print a short usage guide with exact jaiph run commands.
-    7) End your response with:
+    4) Keep workflows minimal, composable, and specific to this project.
+    5) Print a short usage guide with exact jaiph run commands.
+    6) End your response with:
        - WHAT CHANGED: files touched and key edits
        - WHY: tie each edit to repository structure, tests, or sandbox needs
   """
@@ -71,51 +68,10 @@ e2e::pass "bootstrap template matches expected triple-quoted prompt content"
 jaiph compile "${BOOTSTRAP_FILE}"
 e2e::pass "generated bootstrap workflow compiles"
 
-e2e::assert_file_exists "${TEST_DIR}/.jaiph/Dockerfile" ".jaiph/Dockerfile exists"
-expected_dockerfile="$(mktemp)"
-cat > "${expected_dockerfile}" <<'EOF'
-# Extends the official jaiph runtime image with agent CLIs for prompt steps.
-# The base image already contains Node.js, jaiph, and fuse-overlayfs.
-# For a minimal image without agent CLIs, use ghcr.io/jaiphlang/jaiph-runtime directly.
-FROM ghcr.io/jaiphlang/jaiph-runtime:nightly
-
-# Generated by jaiph init for project sandboxing.
-# Keep this file aligned with your repository's runtime/build/test needs.
-
-USER root
-
-# Claude Code CLI (Anthropic)
-RUN npm install -g @anthropic-ai/claude-code
-
-USER jaiph
-
-# cursor-agent (Cursor) — install as the runtime user so the binary remains
-# reachable after switching away from root. The installer currently places
-# the CLI in ~/.local/bin and may name it "agent" or "cursor".
-RUN mkdir -p "$HOME/.local/bin" && \
-    curl -fsSL https://cursor.com/install -o /tmp/install-cursor-agent.sh && \
-    bash /tmp/install-cursor-agent.sh && \
-    export PATH="$HOME/.local/bin:$PATH" && \
-    if command -v cursor-agent >/dev/null 2>&1; then \
-      true; \
-    elif command -v agent >/dev/null 2>&1; then \
-      ln -sf "$(command -v agent)" "$HOME/.local/bin/cursor-agent"; \
-    elif command -v cursor >/dev/null 2>&1; then \
-      ln -sf "$(command -v cursor)" "$HOME/.local/bin/cursor-agent"; \
-    fi && \
-    command -v cursor-agent >/dev/null 2>&1 && \
-    rm -f /tmp/install-cursor-agent.sh
-
-# Add project-specific package managers/build tools below as needed.
-
-WORKDIR /jaiph/workspace
-EOF
-if ! cmp -s "${TEST_DIR}/.jaiph/Dockerfile" "${expected_dockerfile}"; then
-  rm -f "${expected_dockerfile}"
-  e2e::fail "Expected .jaiph/Dockerfile to match init template extending official GHCR image"
+if [[ -f "${TEST_DIR}/.jaiph/Dockerfile" ]]; then
+  e2e::fail "Expected jaiph init not to create .jaiph/Dockerfile"
 fi
-rm -f "${expected_dockerfile}"
-e2e::pass ".jaiph/Dockerfile matches expected init template content"
+e2e::pass "jaiph init does not create .jaiph/Dockerfile"
 
 # Bash command substitution strips a trailing newline; compare bytes with cmp.
 if ! cmp -s "${TEST_DIR}/.jaiph/.gitignore" <(printf 'runs\ntmp\n'); then
diff --git a/.jaiph/Dockerfile b/runtime/Dockerfile
similarity index 100%
rename from .jaiph/Dockerfile
rename to runtime/Dockerfile
diff --git a/src/cli/commands/init.test.ts b/src/cli/commands/init.test.ts
index af7fdd21..2d87e43f 100644
--- a/src/cli/commands/init.test.ts
+++ b/src/cli/commands/init.test.ts
@@ -7,7 +7,6 @@ import { runInit } from "./init";
 import { parsejaiph } from "../../parser";
 
 const CANONICAL_GITIGNORE = "runs\ntmp\n";
-const JAIPH_INSTALL_COMMAND = "curl -fsSL https://jaiph.org/install | bash";
 
 function makeTempDir(): string {
   const dir = join(tmpdir(), `jaiph-init-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
@@ -50,16 +49,11 @@ test("init: generated bootstrap uses triple-quoted prompt and parses", () => {
   }
 });
 
-test("init: creates .jaiph/Dockerfile extending official GHCR image", () => {
+test("init: does not create .jaiph/Dockerfile", () => {
   const dir = makeTempDir();
   try {
     assert.equal(runInit([dir]), 0);
-    const dockerfilePath = join(dir, ".jaiph", "Dockerfile");
-    assert.equal(existsSync(dockerfilePath), true);
-    const dockerfile = readFileSync(dockerfilePath, "utf8");
-    assert.equal(dockerfile.includes("ghcr.io/jaiphlang/jaiph-runtime"), true);
-    assert.equal(dockerfile.includes("cursor"), true);
-    assert.equal(dockerfile.includes("claude-code"), true);
+    assert.equal(existsSync(join(dir, ".jaiph", "Dockerfile")), false);
   } finally {
     rmSync(dir, { recursive: true, force: true });
   }
diff --git a/src/cli/commands/init.ts b/src/cli/commands/init.ts
index 43b16865..daaef957 100644
--- a/src/cli/commands/init.ts
+++ b/src/cli/commands/init.ts
@@ -15,16 +15,13 @@ workflow default() {
     Perform these tasks in order:
     1) Analyze repository structure, languages, package manager, and build/test/lint commands.
     2) Detect existing contribution conventions (branching, commit style, CI checks).
-    3) Review and update .jaiph/Dockerfile for this specific repository.
-       - This file defines the Docker sandbox (runtimes, package managers, build tools).
-       - Align it with how this project is actually built and tested.
-    4) Create or update Jaiph workflows under .jaiph/ for safe feature implementation, including:
+    3) Create or update Jaiph workflows under .jaiph/ for safe feature implementation, including:
        - preflight checks (clean git state, branch guards when relevant)
        - implementation workflow
        - verification workflow (tests/lint/build)
-    5) Keep workflows minimal, composable, and specific to this project.
-    6) Print a short usage guide with exact jaiph run commands.
-    7) End your response with:
+    4) Keep workflows minimal, composable, and specific to this project.
+    5) Print a short usage guide with exact jaiph run commands.
+    6) End your response with:
        - WHAT CHANGED: files touched and key edits
        - WHY: tie each edit to repository structure, tests, or sandbox needs
   """
@@ -35,44 +32,6 @@ workflow default() {
 
 /** Ignores ephemeral dirs under `.jaiph/`; kept in-repo so workflows and libs stay tracked. */
 const JAIPH_DIR_GITIGNORE_TEMPLATE = "runs\ntmp\n";
-const DOCKERFILE_TEMPLATE_MARKER = "# Generated by jaiph init for project sandboxing.";
-const JAIPH_INSTALL_COMMAND = "curl -fsSL https://jaiph.org/install | bash";
-const JAIPH_DOCKERFILE_TEMPLATE = `# Extends the official jaiph runtime image with agent CLIs for prompt steps.
-# The base image already contains Node.js, jaiph, and fuse-overlayfs.
-# For a minimal image without agent CLIs, use ghcr.io/jaiphlang/jaiph-runtime directly.
-FROM ghcr.io/jaiphlang/jaiph-runtime:nightly
-
-${DOCKERFILE_TEMPLATE_MARKER}
-# Keep this file aligned with your repository's runtime/build/test needs.
-
-USER root
-
-# Claude Code CLI (Anthropic)
-RUN npm install -g @anthropic-ai/claude-code
-
-USER jaiph
-
-# cursor-agent (Cursor) — install as the runtime user so the binary remains
-# reachable after switching away from root. The installer currently places
-# the CLI in ~/.local/bin and may name it "agent" or "cursor".
-RUN mkdir -p "$HOME/.local/bin" && \\
-    curl -fsSL https://cursor.com/install -o /tmp/install-cursor-agent.sh && \\
-    bash /tmp/install-cursor-agent.sh && \\
-    export PATH="$HOME/.local/bin:$PATH" && \\
-    if command -v cursor-agent >/dev/null 2>&1; then \\
-      true; \\
-    elif command -v agent >/dev/null 2>&1; then \\
-      ln -sf "$(command -v agent)" "$HOME/.local/bin/cursor-agent"; \\
-    elif command -v cursor >/dev/null 2>&1; then \\
-      ln -sf "$(command -v cursor)" "$HOME/.local/bin/cursor-agent"; \\
-    fi && \\
-    command -v cursor-agent >/dev/null 2>&1 && \\
-    rm -f /tmp/install-cursor-agent.sh
-
-# Add project-specific package managers/build tools below as needed.
-
-WORKDIR /jaiph/workspace
-`;
 
 export function runInit(rest: string[]): number {
   const workspaceArg = rest[0] ?? ".";
@@ -86,7 +45,6 @@ export function runInit(rest: string[]): number {
   const jaiphDir = join(workspaceRoot, ".jaiph");
   const gitignorePath = join(jaiphDir, ".gitignore");
   const bootstrapPath = join(jaiphDir, "bootstrap.jh");
-  const dockerfilePath = join(jaiphDir, "Dockerfile");
   const skillPath = join(jaiphDir, "SKILL.md");
   const palette = colorPalette();
 
@@ -124,25 +82,6 @@ export function runInit(rest: string[]): number {
   }
   chmodSync(bootstrapPath, 0o755);
 
-  let createdDockerfile = false;
-  let updatedDockerfile = false;
-  let leftDockerfileUnchanged = false;
-  if (existsSync(dockerfilePath)) {
-    const existingDockerfile = readFileSync(dockerfilePath, "utf8");
-    if (existingDockerfile === JAIPH_DOCKERFILE_TEMPLATE) {
-      leftDockerfileUnchanged = true;
-    } else if (existingDockerfile.includes(DOCKERFILE_TEMPLATE_MARKER)) {
-      writeFileSync(dockerfilePath, JAIPH_DOCKERFILE_TEMPLATE, "utf8");
-      updatedDockerfile = true;
-    } else {
-      leftDockerfileUnchanged = true;
-    }
-  } else {
-    process.stdout.write(`${palette.dim}▸ Creating ${join(".jaiph", "Dockerfile")} in ${workspaceRoot}...${palette.reset}\n`);
-    writeFileSync(dockerfilePath, JAIPH_DOCKERFILE_TEMPLATE, "utf8");
-    createdDockerfile = true;
-  }
-
   const installedSkillPath = resolveInstalledSkillPath();
   let wroteSkill = false;
   if (installedSkillPath) {
@@ -161,15 +100,6 @@ export function runInit(rest: string[]): number {
   if (!createdBootstrap) {
     process.stdout.write(`${palette.dim}▸ Note: bootstrap file already existed; left unchanged.${palette.reset}\n`);
   }
-  if (createdDockerfile) {
-    process.stdout.write(`${palette.green}✓ Created ${join(".jaiph", "Dockerfile")} with Jaiph installer${palette.reset}\n`);
-  } else if (updatedDockerfile) {
-    process.stdout.write(`${palette.green}✓ Updated ${join(".jaiph", "Dockerfile")} with latest init template${palette.reset}\n`);
-  } else if (leftDockerfileUnchanged) {
-    process.stdout.write(
-      `${palette.dim}▸ Note: ${join(".jaiph", "Dockerfile")} already existed; left unchanged. Bootstrap workflow should review it for project sandbox needs.${palette.reset}\n`,
-    );
-  }
   if (wroteSkill) {
     process.stdout.write(`${palette.green}✓ Wrote ${join(".jaiph", "SKILL.md")} from installation${palette.reset}\n`);
   } else {
diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts
index 0138fe08..16136a59 100644
--- a/src/runtime/docker.ts
+++ b/src/runtime/docker.ts
@@ -283,9 +283,8 @@ export function verifyImageHasJaiph(image: string): void {
  *
  * Always uses `config.image` (from env, in-file `runtime.docker_image`, or the
  * default `ghcr.io/jaiphlang/jaiph-runtime:<version>`). Pulls from the registry
- * if the image is not present locally. Does not build from `.jaiph/Dockerfile`;
- * use that file to build/push a custom image and set `runtime.docker_image` or
- * `JAIPH_DOCKER_IMAGE` to reference it.
+ * if the image is not present locally. To use a custom image, build/push it
+ * yourself and set `runtime.docker_image` or `JAIPH_DOCKER_IMAGE`.
  *
  * Verifies that `jaiph` exists in the image before use (`E_DOCKER_NO_JAIPH`).
  */

From f1990188320eab53f4d488db19f2e154e0d49e01 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Tue, 21 Apr 2026 06:56:27 +0200
Subject: [PATCH 23/38] Docker: Split sandbox into overlay (Linux) and
 host-clone (macOS) modes

Replaces the in-container rsync/cp fallback (~40s on this repo) with a
host-side workspace clone using cp -cR (APFS clonefile, O(1) per file)
on macOS. Linux keeps fuse-overlayfs as the primary path; copy mode is
selected when /dev/fuse is missing on the host or JAIPH_DOCKER_NO_OVERLAY=1.

OVERLAY_SCRIPT shrinks from ~155 lines to ~22 (no in-container fallback;
host owns the slow path). Copy mode drops SYS_ADMIN, /dev/fuse, and the
overlay-script mount. Clone lives at <runs-root>/.sandbox-<id>/, removed
on exit unless JAIPH_DOCKER_KEEP_SANDBOX=1.

Tests cover both modes plus an explicit guard that the clone produces
independent inodes (writes inside the container do not leak to the host).

Made-with: Cursor
---
 docs/sandboxing.md         |  45 +++-
 src/runtime/docker.test.ts | 209 +++++++++++++++--
 src/runtime/docker.ts      | 461 +++++++++++++++++++++++--------------
 3 files changed, 508 insertions(+), 207 deletions(-)

diff --git a/docs/sandboxing.md b/docs/sandboxing.md
index 518afd78..b8e39f28 100644
--- a/docs/sandboxing.md
+++ b/docs/sandboxing.md
@@ -25,8 +25,8 @@ Docker sandboxing is designed to contain damage from untrusted or semi-trusted w
 
 **What Docker protects against:**
 
-- **Filesystem access** -- Scripts inside the container cannot read or write arbitrary host paths. The host workspace is mounted read-only; writes go to a tmpfs overlay and are discarded on exit. Only the run-artifacts directory (`/jaiph/run`) persists writes to the host.
-- **Process isolation** -- Container processes cannot see or signal host processes. The container runs with `--cap-drop ALL` (only `SYS_ADMIN` is re-added for fuse-overlayfs) and `--security-opt no-new-privileges` to prevent privilege escalation.
+- **Filesystem access** -- Scripts inside the container cannot read or write arbitrary host paths. The container's `/jaiph/workspace` is either an in-container fuse-overlayfs union over a read-only bind of the host workspace (overlay mode, writes land in a tmpfs upper layer and are discarded on exit) or a host-side clone of the workspace mounted read-write (copy mode, the clone is removed on exit). Only the run-artifacts directory (`/jaiph/run`) persists writes back to the host workspace.
+- **Process isolation** -- Container processes cannot see or signal host processes. The container runs with `--cap-drop ALL` (overlay mode re-adds `SYS_ADMIN` for fuse-overlayfs; copy mode runs without it) and `--security-opt no-new-privileges` to prevent privilege escalation.
 - **Credential leakage** -- Sensitive host environment variables (`SSH_*`, `GPG_*`, `AWS_*`, `GCP_*`, `AZURE_*`, `GOOGLE_*`, `DOCKER_*`, `KUBE*`, `NPM_TOKEN*`) are never forwarded into the container. Only `JAIPH_*` (except `JAIPH_DOCKER_*`) and agent prefixes (`ANTHROPIC_*`, `CLAUDE_*`, `CURSOR_*`) cross the container boundary.
 - **Mount safety** -- The host root filesystem (`/`), Docker socket (`/var/run/docker.sock`, `/run/docker.sock`), and OS internals (`/proc`, `/sys`, `/dev`) cannot be mounted into the container. Attempting to do so produces `E_VALIDATE_MOUNT`.
 
@@ -44,7 +44,12 @@ Docker sandboxing is designed to contain damage from untrusted or semi-trusted w
 
 Docker applies to `jaiph run` only (not `jaiph test`). When enabled, the entire workflow -- every rule and script step -- runs inside a single container. The container runs `jaiph run --raw <file>` using its own installed jaiph -- not the host's. The `--raw` flag makes jaiph emit `__JAIPH_EVENT__` lines to stderr without rendering a progress tree, so the host CLI can render from those events.
 
-The host workspace is mounted **read-only** to prevent bind-mount deadlocks with concurrent runners on macOS Docker Desktop. A `fuse-overlayfs` copy-on-write overlay makes the workspace appear writable inside the container -- reads come from the host mount, writes go to a tmpfs upper layer and are discarded on exit. Run artifacts are written to a separate rw mount at `/jaiph/run` (outside the overlay), so they persist to the host. If `fuse-overlayfs` is unavailable, the workspace stays read-only (no regression).
+The container's `/jaiph/workspace` always *looks* writable to scripts but never mutates the host checkout. The CLI picks one of two sandbox primitives at launch time:
+
+- **Overlay mode** (selected when `/dev/fuse` exists on the host -- typically Linux). The host workspace is bind-mounted read-only at `/jaiph/workspace-ro`. The runtime entrypoint (`overlay-run.sh`) sets up `fuse-overlayfs` with that read-only bind as the lower layer and a tmpfs as the upper layer, merged at `/jaiph/workspace`. Writes go to the tmpfs and are discarded on container exit. Requires `--cap-add SYS_ADMIN` and `--device /dev/fuse`.
+- **Copy mode** (selected when `/dev/fuse` is missing -- typically macOS Docker Desktop, or when forced via `JAIPH_DOCKER_NO_OVERLAY=1`). Before launching the container, the CLI clones the host workspace (excluding `.jaiph/runs`) into a fresh `<runs-root>/.sandbox-<id>/` directory, then bind-mounts that clone read-write at `/jaiph/workspace`. On macOS the clone uses `cp -cR` (APFS clonefile, near-zero cost); on other platforms it falls back to `cp -pR` and emits a one-line stderr warning. The clone is removed on exit unless `JAIPH_DOCKER_KEEP_SANDBOX=1` is set. No `SYS_ADMIN`, no `/dev/fuse`, no in-container overlay script.
+
+In both modes, run artifacts are written to a separate rw mount at `/jaiph/run` (outside the workspace sandbox) so they persist to the host.
 
 ### Enabling Docker
 
@@ -98,18 +103,18 @@ If `JAIPH_DOCKER_TIMEOUT` is set but not a valid integer, the default (`300`) is
 
 ### Mount specifications
 
-Mount strings in `runtime.workspace` define which host paths are visible inside the container. All mounts are **forced to read-only** regardless of the specified mode to prevent bind-mount deadlocks on macOS Docker Desktop. The overlay wrapper makes the workspace writable via fuse-overlayfs.
+Mount strings in `runtime.workspace` define which host paths are visible inside the container. The mount targeting `/jaiph/workspace` selects the workspace source; additional sub-mounts pin parts of the tree to a particular mode (e.g. `"config:ro"` to make a subdir read-only inside the container).
 
 | Form | Segments | Example | Result |
 |------|----------|---------|--------|
-| Full | 3 | `".:/jaiph/workspace:rw"` | Mount `.` at `/jaiph/workspace` and `/jaiph/workspace-ro` (both read-only; overlay makes workspace writable) |
-| Shorthand | 2 | `"config:ro"` | Mount `config` at `/jaiph/workspace/config` and `/jaiph/workspace-ro/config` (read-only) |
+| Full | 3 | `".:/jaiph/workspace:rw"` | Workspace source. In overlay mode this becomes the read-only lower layer at `/jaiph/workspace-ro`; in copy mode the clone is mounted rw at `/jaiph/workspace`. |
+| Shorthand | 2 | `"config:ro"` | Mount `config` under `/jaiph/workspace/config`. In overlay mode the path is duplicated at `/jaiph/workspace-ro/config`; in copy mode the cloned subdirectory is bound at the requested mode. |
 | Too few | 1 | `"data"` | `E_PARSE` |
 | Too many | 4+ | `"a:b:c:d"` | `E_PARSE` |
 
 Mode must be `ro` or `rw` (otherwise `E_PARSE`). Exactly one mount must target `/jaiph/workspace` -- zero or more than one produces `E_VALIDATE`. The default `[".:/jaiph/workspace:rw"]` satisfies this requirement.
 
-Host paths are resolved relative to the workspace root. Each mount is duplicated at the overlay lower-layer path (`/jaiph/workspace-ro/...`) so the overlay wrapper can use it as the read-only source.
+Host paths are resolved relative to the workspace root. In overlay mode each mount is duplicated at the overlay lower-layer path (`/jaiph/workspace-ro/...`) so the overlay wrapper can use it as the read-only source. In copy mode, sub-mounts under `/jaiph/workspace` are bound from the cloned workspace directory.
 
 The following host paths are rejected at mount validation time with `E_VALIDATE_MOUNT`:
 
@@ -119,31 +124,45 @@ The following host paths are rejected at mount validation time with `E_VALIDATE_
 
 ### Container layout
 
+Overlay mode:
+
 ```
 /jaiph/
   workspace-ro/       # read-only bind mount of host workspace (overlay lower layer)
   workspace/          # fuse-overlayfs merged view (reads from -ro, writes to tmpfs)
     *.jh              # source files
     .jaiph/           # project config
-  run/                # writable bind mount for this run's artifacts (host temp dir)
+  run/                # writable bind mount for this run's artifacts (host runs root)
   overlay-run.sh      # runtime-generated entrypoint mounted ro from host temp file
 ```
 
-The working directory is `/jaiph/workspace`. The host CLI generates `overlay-run.sh` (a ~10 line bash script) to a temp file and mounts it read-only at `/jaiph/overlay-run.sh`. The container runs `/jaiph/overlay-run.sh jaiph run --raw <file>`. The overlay wrapper sets up fuse-overlayfs, then execs the jaiph command. The image must already contain `jaiph` — Jaiph does not install itself into the container at runtime. No `COPY` in the project Dockerfile is needed for jaiph runtime files — `overlay-run.sh` is a jaiph runtime artifact.
+Copy mode:
+
+```
+/jaiph/
+  workspace/          # rw bind mount of <runs-root>/.sandbox-<id>/ on the host
+    *.jh              # cloned source files (writes are local to the clone)
+    .jaiph/           # cloned config (.jaiph/runs is excluded from the clone)
+  run/                # writable bind mount for this run's artifacts (host runs root)
+```
+
+The working directory is `/jaiph/workspace`. In overlay mode the host CLI generates `overlay-run.sh` (a short bash script) to a temp file and mounts it read-only at `/jaiph/overlay-run.sh`; the container runs `/jaiph/overlay-run.sh jaiph run --raw <file>`. In copy mode the container runs `jaiph run --raw <file>` directly -- no entrypoint script. The image must already contain `jaiph` — Jaiph does not install itself into the container at runtime.
 
 ### Runtime behavior
 
-**Container lifecycle** -- `docker run --rm` launches the container and auto-removes it on exit. `--cap-drop ALL --cap-add SYS_ADMIN` drops all Linux capabilities except `SYS_ADMIN` (required for fuse-overlayfs). `--security-opt no-new-privileges` prevents any process inside the container from gaining additional privileges. `--device /dev/fuse` exposes the FUSE device for the overlay. The pseudo-TTY flag (`-t`) is intentionally omitted: Docker's `-t` merges stderr into stdout, which would break the `__JAIPH_EVENT__` stderr-only live contract. On Linux, `--user <uid>:<gid>` maps the container user to the host user.
+**Container lifecycle** -- `docker run --rm` launches the container and auto-removes it on exit. `--cap-drop ALL` drops all Linux capabilities; overlay mode re-adds `SYS_ADMIN` (required for fuse-overlayfs) and exposes `/dev/fuse` via `--device`, while copy mode runs without either. `--security-opt no-new-privileges` prevents any process inside the container from gaining additional privileges. The pseudo-TTY flag (`-t`) is intentionally omitted: Docker's `-t` merges stderr into stdout, which would break the `__JAIPH_EVENT__` stderr-only live contract. On Linux, `--user <uid>:<gid>` maps the container user to the host user.
 
 **stdin** -- The `docker run` process is spawned with stdin set to `ignore` to prevent the Docker CLI from blocking on stdin EOF.
 
 **Events** -- The container's jaiph runs in `--raw` mode: it spawns the runtime with inherited stdio, so `__JAIPH_EVENT__` JSON flows directly to the container's stderr. The host CLI reads Docker's stderr pipe and renders the progress tree. stdout carries plain script output. `STEP_END` events embed `out_content` (and `err_content` on failure) so consumers do not need host paths to step artifact files.
 
-**Overlay** -- The `overlay-run.sh` wrapper (generated by the host CLI and mounted read-only) sets up `fuse-overlayfs` with the ro bind mount (`/jaiph/workspace-ro`) as the lower layer and a tmpfs as the upper layer, merged at `/jaiph/workspace`. All workspace writes go to the tmpfs and are discarded on container exit. If fuse-overlayfs is unavailable (e.g. the image doesn't include it), the overlay step is skipped and the workspace remains read-only.
+**Sandbox primitive (overlay vs. copy)** -- Selected at launch time. If `/dev/fuse` exists on the host, the CLI uses **overlay mode**: the `overlay-run.sh` wrapper (generated by the host CLI and mounted read-only) sets up `fuse-overlayfs` with the ro bind mount (`/jaiph/workspace-ro`) as the lower layer and a tmpfs as the upper layer, merged at `/jaiph/workspace`. All workspace writes go to the tmpfs and are discarded on container exit. If `fuse-overlayfs` is missing from the image at runtime, the entrypoint exits with `E_DOCKER_OVERLAY` -- there is no in-container fallback. Set `JAIPH_DOCKER_NO_OVERLAY=1` on the host to opt into copy mode instead.
+
+If `/dev/fuse` is missing on the host, the CLI uses **copy mode**: before launching the container it clones the workspace into `<runs-root>/.sandbox-<id>/` (excluding `.jaiph/runs`) using `cp -cR` on macOS (APFS clonefile, O(1) per file) or `cp -pR` elsewhere (a real copy; a single stderr warning is printed when the fast path is unavailable). The clone is bind-mounted rw at `/jaiph/workspace`. After the container exits the clone is removed unless `JAIPH_DOCKER_KEEP_SANDBOX=1` is set, in which case the path is left in place and printed to stderr for debugging.
 
 **Run artifacts** -- The host CLI mounts the resolved host runs root at `/jaiph/run:rw` inside the container. By default this is `.jaiph/runs` under the workspace; a relative `JAIPH_RUNS_DIR` is resolved under the workspace; an absolute `JAIPH_RUNS_DIR` must stay within the workspace or the run fails with `E_DOCKER_RUNS_DIR`. `JAIPH_RUNS_DIR` is set to `/jaiph/run` inside the container, so the runtime writes artifacts directly into the requested host path.
 
-**Workspace immutability contract** -- Docker runs cannot directly modify the host workspace. The host checkout is bind-mounted read-only; the writable `/jaiph/workspace` inside the container is a sandbox-local copy-on-write layer (fuse-overlayfs or copy fallback) whose state is discarded on container exit. The only persistence channel from a Docker run to the host is the run-artifacts directory (`/jaiph/run` → host `.jaiph/runs`). Non-Docker (local) runs are unaffected by this contract.
+**Workspace immutability contract** -- Docker runs cannot directly modify the host workspace. In overlay mode the host checkout is bind-mounted read-only and writes land in a tmpfs upper layer that is discarded on container exit. In copy mode the container writes to a separate host-side clone of the workspace (`<runs-root>/.sandbox-<id>/`), which is removed on container exit unless explicitly kept for debugging. In both modes the only persistence channel from a Docker run to the host is the run-artifacts directory (`/jaiph/run` → host `.jaiph/runs`). Non-Docker (local) runs are unaffected by this contract.
 
 **Workspace patch export** -- To capture workspace changes as a patch, call `artifacts.save_patch(name)` from `jaiphlang/artifacts`; it writes a named `git diff` into the `artifacts/` subdirectory under the run tree. Callers choose when and what to record; output lives alongside other artifacts. See [Libraries — `jaiphlang/artifacts`](libraries.md#jaiphlangartifacts--publishing-files-out-of-the-sandbox). The published GHCR runtime image includes `git`.
 
@@ -164,6 +183,8 @@ Docker-related errors use `E_DOCKER_*` codes for programmatic detection:
 | `E_DOCKER_BUILD` | `docker build` fails when using helpers that build from a Dockerfile | Not used during normal `jaiph run` image resolution. |
 | `E_DOCKER_NO_JAIPH` | Selected image does not contain a `jaiph` CLI | Run exits with guidance to use the official image or install jaiph. |
 | `E_DOCKER_RUNS_DIR` | Absolute `JAIPH_RUNS_DIR` points outside the workspace | Run exits. Use a relative path or an absolute path within the workspace. |
+| `E_DOCKER_OVERLAY` | Overlay mode selected but `fuse-overlayfs` is missing from the image or the mount fails inside the container | Container exits with code 78. Use the official runtime image, install `fuse-overlayfs` in your custom image, or set `JAIPH_DOCKER_NO_OVERLAY=1` on the host to switch to copy mode. |
+| `E_DOCKER_SANDBOX_COPY` | Copy mode failed to clone the host workspace (`cp` returned non-zero) | Run exits before container launch. Inspect the path printed in the error. |
 | `E_VALIDATE_MOUNT` | Mount targets a denied host path (`/`, `/proc`, docker socket, etc.) | Run exits before container launch. |
 | `E_TIMEOUT` | Container exceeds `runtime.docker_timeout` seconds | Container receives SIGTERM, then SIGKILL after 5s grace period. |
 
diff --git a/src/runtime/docker.test.ts b/src/runtime/docker.test.ts
index b36bd64e..b3196ebc 100644
--- a/src/runtime/docker.test.ts
+++ b/src/runtime/docker.test.ts
@@ -15,11 +15,14 @@ import {
   isEnvDenied,
   ENV_DENYLIST_PREFIXES,
   GHCR_IMAGE_REPO,
+  selectSandboxMode,
+  cloneWorkspaceForSandbox,
+  allocateSandboxWorkspaceDir,
   type MountSpec,
   type DockerRunConfig,
   type DockerSpawnOptions,
 } from "./docker";
-import { mkdtempSync, writeFileSync, mkdirSync, existsSync, readFileSync, rmSync } from "node:fs";
+import { mkdtempSync, writeFileSync, mkdirSync, existsSync, readFileSync, readdirSync, rmSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join, dirname } from "node:path";
 
@@ -50,10 +53,15 @@ function defaultOpts(overrides?: Partial<DockerSpawnOptions>): DockerSpawnOption
     runArgs: [],
     env: {},
     isTTY: false,
+    sandboxMode: "overlay",
     ...overrides,
   };
 }
 
+function copyOpts(sandboxWorkspaceDir: string, overrides?: Partial<DockerSpawnOptions>): DockerSpawnOptions {
+  return defaultOpts({ sandboxMode: "copy", sandboxWorkspaceDir, ...overrides });
+}
+
 // ---------------------------------------------------------------------------
 // parseMount
 // ---------------------------------------------------------------------------
@@ -430,21 +438,23 @@ test("writeOverlayScript: creates executable script with fuse-overlayfs setup",
     assert.ok(existsSync(scriptPath));
     const content = readFileSync(scriptPath, "utf8");
     assert.ok(content.startsWith("#!/usr/bin/env bash"));
-    assert.ok(content.includes("fuse-overlayfs"));
-    assert.ok(content.includes("workspace overlay unavailable"));
-    assert.ok(content.includes("copying workspace into a temp directory before startup"));
-    assert.ok(content.includes("using copy fallback"));
-    assert.ok(content.includes("live output begins after the copy completes"));
-    assert.ok(content.includes("excludes .jaiph/runs"));
-    assert.ok(content.includes("mktemp -d /tmp/jaiph-workspace."));
-    assert.ok(content.includes("rewrite_workspace_path()"));
-    assert.ok(content.includes("--exclude='.jaiph/runs'"));
-    assert.ok(content.includes('rsync -a --delete --exclude=\'.jaiph/runs\' --no-owner --no-group'));
-    assert.ok(content.includes("case \"$entry\" in"));
-    assert.ok(content.includes(".|..|.jaiph) continue ;;"));
-    assert.ok(content.includes(".|..|runs) continue ;;"));
-    assert.ok(content.includes("mktemp \"$MERGED/.jaiph-overlay-probe.XXXXXX\""));
-    assert.ok(content.includes('exec "${rewritten_args[@]}"'));
+    assert.ok(content.includes("fuse-overlayfs -o"));
+    assert.ok(content.includes("lowerdir=$LOWER,upperdir=$UPPER,workdir=$WORK"));
+    assert.ok(content.includes('exec "$@"'));
+    assert.ok(content.includes("E_DOCKER_OVERLAY"));
+  } finally {
+    rmSync(dirname(scriptPath), { recursive: true, force: true });
+  }
+});
+
+test("writeOverlayScript: contains no in-container rsync/cp fallback (host handles it now)", () => {
+  const scriptPath = writeOverlayScript();
+  try {
+    const content = readFileSync(scriptPath, "utf8");
+    assert.ok(!content.includes("rsync"), "rsync fallback removed from container script");
+    assert.ok(!content.includes("copy_workspace_with_cp"), "cp fallback removed from container script");
+    assert.ok(!content.includes("rewrite_workspace_path"), "path-rewrite logic removed");
+    assert.ok(!content.includes("RUNTIME_WORKSPACE"), "workspace switch logic removed");
   } finally {
     rmSync(dirname(scriptPath), { recursive: true, force: true });
   }
@@ -621,3 +631,170 @@ test("buildDockerArgs: includes --cap-drop ALL and --security-opt no-new-privile
   assert.equal(args[secOptIdx + 1], "no-new-privileges");
 });
 
+// ---------------------------------------------------------------------------
+// buildDockerArgs: copy-mode sandbox (host pre-clones workspace, mounts rw)
+// ---------------------------------------------------------------------------
+
+test("buildDockerArgs: copy mode mounts cloned workspace rw at /jaiph/workspace and skips overlay/fuse/SYS_ADMIN", () => {
+  const cloneDir = mkdtempSync(join(tmpdir(), "jaiph-test-clone-"));
+  try {
+    const args = buildDockerArgs(copyOpts(cloneDir));
+    const vFlags = args.filter((_, i) => i > 0 && args[i - 1] === "-v");
+
+    const wsMount = vFlags.find((v) => v.endsWith(":/jaiph/workspace:rw"));
+    assert.ok(wsMount, "workspace bound rw at /jaiph/workspace");
+    assert.ok(wsMount!.startsWith(`${cloneDir}:`), "host side is the cloned workspace");
+    assert.ok(!vFlags.some((v) => v.includes("/jaiph/workspace-ro")), "no overlay lower-layer mount in copy mode");
+    assert.ok(!vFlags.some((v) => v.includes("/jaiph/overlay-run.sh")), "no overlay script mount in copy mode");
+
+    assert.ok(!args.includes("/dev/fuse"), "no fuse device in copy mode");
+    assert.ok(!args.includes("SYS_ADMIN"), "no SYS_ADMIN cap in copy mode");
+
+    assert.ok(args.includes("--cap-drop"));
+    assert.ok(args.includes("ALL"));
+    assert.ok(args.includes("--security-opt"));
+    assert.ok(args.includes("no-new-privileges"));
+
+    const idxImage = args.indexOf("ubuntu:24.04");
+    const tail = args.slice(idxImage + 1);
+    assert.equal(tail[0], "jaiph", "no overlay-run.sh wrapper in copy mode");
+    assert.equal(tail[1], "run");
+    assert.equal(tail[2], "--raw");
+    assert.equal(tail[3], "/jaiph/workspace/main.jh");
+  } finally {
+    rmSync(cloneDir, { recursive: true, force: true });
+  }
+});
+
+test("buildDockerArgs: copy mode binds run dir rw at /jaiph/run", () => {
+  const cloneDir = mkdtempSync(join(tmpdir(), "jaiph-test-clone-"));
+  try {
+    const args = buildDockerArgs(copyOpts(cloneDir));
+    const vFlags = args.filter((_, i) => i > 0 && args[i - 1] === "-v");
+    const runMount = vFlags.find((v) => v.endsWith(":/jaiph/run:rw"));
+    assert.ok(runMount, "run dir bound rw at /jaiph/run");
+  } finally {
+    rmSync(cloneDir, { recursive: true, force: true });
+  }
+});
+
+test("buildDockerArgs: copy mode honors workspace sub-mounts as separate binds (e.g. config:ro)", () => {
+  const cloneDir = mkdtempSync(join(tmpdir(), "jaiph-test-clone-"));
+  try {
+    const opts = copyOpts(cloneDir, {
+      config: {
+        ...defaultOpts().config,
+        mounts: [
+          { hostPath: ".", containerPath: "/jaiph/workspace", mode: "rw" },
+          { hostPath: "config", containerPath: "/jaiph/workspace/config", mode: "ro" },
+        ],
+      },
+    });
+    const args = buildDockerArgs(opts);
+    const vFlags = args.filter((_, i) => i > 0 && args[i - 1] === "-v");
+    const subMount = vFlags.find((v) => v.endsWith(":/jaiph/workspace/config:ro"));
+    assert.ok(subMount, "config sub-mount present and ro");
+    assert.ok(subMount!.startsWith(`${join(cloneDir, "config")}:`), "config sub-mount points into the cloned workspace");
+  } finally {
+    rmSync(cloneDir, { recursive: true, force: true });
+  }
+});
+
+test("buildDockerArgs: throws when overlay mode is selected without script path", () => {
+  assert.throws(() => buildDockerArgs(defaultOpts({ sandboxMode: "overlay" })), /overlay mode requires/);
+});
+
+test("buildDockerArgs: throws when copy mode is selected without sandboxWorkspaceDir", () => {
+  assert.throws(
+    () => buildDockerArgs(defaultOpts({ sandboxMode: "copy", sandboxWorkspaceDir: undefined })),
+    /copy mode requires sandboxWorkspaceDir/,
+  );
+});
+
+// ---------------------------------------------------------------------------
+// selectSandboxMode
+// ---------------------------------------------------------------------------
+
+test("selectSandboxMode: JAIPH_DOCKER_NO_OVERLAY=1 forces copy", () => {
+  assert.equal(selectSandboxMode({ JAIPH_DOCKER_NO_OVERLAY: "1" }), "copy");
+  assert.equal(selectSandboxMode({ JAIPH_DOCKER_NO_OVERLAY: "true" }), "copy");
+});
+
+test("selectSandboxMode: returns overlay iff /dev/fuse exists on host (platform-correlated)", () => {
+  const expected = existsSync("/dev/fuse") ? "overlay" : "copy";
+  assert.equal(selectSandboxMode({}), expected);
+});
+
+// ---------------------------------------------------------------------------
+// cloneWorkspaceForSandbox + allocateSandboxWorkspaceDir
+// ---------------------------------------------------------------------------
+
+test("cloneWorkspaceForSandbox: copies entries and excludes .jaiph/runs", () => {
+  const src = mkdtempSync(join(tmpdir(), "jaiph-clone-src-"));
+  const dst = mkdtempSync(join(tmpdir(), "jaiph-clone-dst-"));
+  try {
+    writeFileSync(join(src, "file.txt"), "hello");
+    mkdirSync(join(src, "subdir"), { recursive: true });
+    writeFileSync(join(src, "subdir", "nested.txt"), "nested");
+    mkdirSync(join(src, ".jaiph"), { recursive: true });
+    writeFileSync(join(src, ".jaiph", "engineer.jh"), "wf");
+    mkdirSync(join(src, ".jaiph", "runs", "2026-01-01"), { recursive: true });
+    writeFileSync(join(src, ".jaiph", "runs", "2026-01-01", "log.txt"), "PII");
+
+    cloneWorkspaceForSandbox(src, dst);
+
+    assert.equal(readFileSync(join(dst, "file.txt"), "utf8"), "hello");
+    assert.equal(readFileSync(join(dst, "subdir", "nested.txt"), "utf8"), "nested");
+    assert.equal(readFileSync(join(dst, ".jaiph", "engineer.jh"), "utf8"), "wf");
+    assert.ok(!existsSync(join(dst, ".jaiph", "runs")), ".jaiph/runs must NOT be copied");
+  } finally {
+    rmSync(src, { recursive: true, force: true });
+    rmSync(dst, { recursive: true, force: true });
+  }
+});
+
+test("cloneWorkspaceForSandbox: produces independent file inodes (writes do not leak to source)", () => {
+  // Guards against the broken cp-rl/hardlink design we explicitly avoided.
+  const src = mkdtempSync(join(tmpdir(), "jaiph-clone-src-"));
+  const dst = mkdtempSync(join(tmpdir(), "jaiph-clone-dst-"));
+  try {
+    writeFileSync(join(src, "leak-check.txt"), "original");
+    cloneWorkspaceForSandbox(src, dst);
+    writeFileSync(join(dst, "leak-check.txt"), "mutated-by-container");
+    assert.equal(
+      readFileSync(join(src, "leak-check.txt"), "utf8"),
+      "original",
+      "host file must not be mutated by writes inside the cloned workspace",
+    );
+  } finally {
+    rmSync(src, { recursive: true, force: true });
+    rmSync(dst, { recursive: true, force: true });
+  }
+});
+
+test("cloneWorkspaceForSandbox: empty workspace produces empty clone", () => {
+  const src = mkdtempSync(join(tmpdir(), "jaiph-clone-src-"));
+  const dst = mkdtempSync(join(tmpdir(), "jaiph-clone-dst-"));
+  try {
+    cloneWorkspaceForSandbox(src, dst);
+    assert.deepStrictEqual(readdirSync(dst), []);
+  } finally {
+    rmSync(src, { recursive: true, force: true });
+    rmSync(dst, { recursive: true, force: true });
+  }
+});
+
+test("allocateSandboxWorkspaceDir: creates a fresh .sandbox-* dir under the runs root", () => {
+  const runsRoot = mkdtempSync(join(tmpdir(), "jaiph-runs-"));
+  try {
+    const a = allocateSandboxWorkspaceDir(runsRoot);
+    const b = allocateSandboxWorkspaceDir(runsRoot);
+    assert.notEqual(a, b);
+    assert.ok(a.startsWith(join(runsRoot, ".sandbox-")));
+    assert.ok(b.startsWith(join(runsRoot, ".sandbox-")));
+    assert.ok(existsSync(a) && existsSync(b));
+  } finally {
+    rmSync(runsRoot, { recursive: true, force: true });
+  }
+});
+
diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts
index 16136a59..417927ad 100644
--- a/src/runtime/docker.ts
+++ b/src/runtime/docker.ts
@@ -1,5 +1,6 @@
-import { execFileSync, execSync, spawn, ChildProcess } from "node:child_process";
-import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
+import { execFileSync, execSync, spawn, spawnSync, ChildProcess } from "node:child_process";
+import { existsSync, mkdirSync, mkdtempSync, readFileSync, readdirSync, rmSync, writeFileSync } from "node:fs";
+import { randomBytes } from "node:crypto";
 import { tmpdir } from "node:os";
 import { join, resolve, dirname, relative } from "node:path";
 import type { RuntimeConfig } from "../types";
@@ -299,156 +300,39 @@ export function resolveImage(config: DockerRunConfig): string {
 // Overlay entrypoint script (written to temp file, mounted into container)
 // ---------------------------------------------------------------------------
 
+/**
+ * Container-side fuse-overlayfs setup.
+ *
+ * Used only when the host selects "overlay" sandbox mode (i.e. /dev/fuse exists
+ * on the host). Mounts a fuse-overlayfs union at /jaiph/workspace (lower = the
+ * host workspace bind-mounted ro at /jaiph/workspace-ro, upper = tmpfs) and
+ * execs the command. If fuse-overlayfs is missing or fails, the script exits
+ * with a clear error code; the host-copy mode is the documented fallback users
+ * opt into (e.g. when fuse is unavailable on macOS Docker Desktop).
+ *
+ * No in-container rsync/cp fallback. That path was the slow one — we replaced
+ * it with a host-side clone (see `cloneWorkspaceForSandbox`).
+ */
 const OVERLAY_SCRIPT = `#!/usr/bin/env bash
 set -euo pipefail
 LOWER=/jaiph/workspace-ro
 UPPER=/tmp/overlay-upper
 WORK=/tmp/overlay-work
 MERGED=/jaiph/workspace
-RUNTIME_WORKSPACE="$MERGED"
 mkdir -p "$UPPER" "$WORK" "$MERGED"
 
-rewrite_workspace_path() {
-  local value="$1"
-  if [ "$RUNTIME_WORKSPACE" = "$MERGED" ]; then
-    printf '%s' "$value"
-    return
-  fi
-  case "$value" in
-    "$MERGED")
-      printf '%s' "$RUNTIME_WORKSPACE"
-      ;;
-    "$MERGED"/*)
-      printf '%s' "$RUNTIME_WORKSPACE"\${value#$MERGED}
-      ;;
-    *)
-      printf '%s' "$value"
-      ;;
-  esac
-}
-
-copy_workspace_with_rsync() {
-  local target="$1"
-  rsync -a --delete --exclude='.jaiph/runs' --no-owner --no-group --chmod=Du+rwx,Dgo+rx,Fu+rw,Fgo+r "$LOWER"/ "$target"/
-}
-
-copy_workspace_with_cp() {
-  local target="$1"
-  mkdir -p "$target"
-  (
-    cd "$LOWER"
-    shopt -s dotglob nullglob
-    for entry in * .*; do
-      case "$entry" in
-        .|..|.jaiph) continue ;;
-      esac
-      cp -a --no-preserve=ownership "$entry" "$target"/
-    done
-    if [ -d ".jaiph" ]; then
-      mkdir -p "$target/.jaiph"
-      (
-        cd ".jaiph"
-        shopt -s dotglob nullglob
-        for entry in * .*; do
-          case "$entry" in
-            .|..|runs) continue ;;
-          esac
-          cp -a --no-preserve=ownership "$entry" "$target/.jaiph"/
-        done
-      )
-    fi
-  )
-  chmod -R u+rwX "$target" 2>/dev/null || true
-}
-
-overlay_ok=0
-overlay_reason=""
-if command -v fuse-overlayfs >/dev/null 2>&1 && [ -e /dev/fuse ]; then
-  if fuse-overlayfs -o "lowerdir=$LOWER,upperdir=$UPPER,workdir=$WORK" "$MERGED" 2>/tmp/jaiph-fuse-overlay.err; then
-    probe_path="$(mktemp "$MERGED/.jaiph-overlay-probe.XXXXXX" 2>/dev/null || true)"
-    if [ -n "$probe_path" ]; then
-      rm -f "$probe_path"
-      overlay_ok=1
-    else
-      overlay_reason="fuse-overlayfs mounted but workspace is still not writable"
-    fi
-  else
-    overlay_reason="$(tr '\n' ' ' </tmp/jaiph-fuse-overlay.err | sed 's/[[:space:]]\+/ /g; s/^ //; s/ $//')"
-  fi
-else
-  overlay_reason="fuse-overlayfs unavailable or /dev/fuse missing"
+if ! command -v fuse-overlayfs >/dev/null 2>&1; then
+  printf 'E_DOCKER_OVERLAY fuse-overlayfs not found in image; install it or set JAIPH_DOCKER_NO_OVERLAY=1 on the host to use the copy sandbox path\\n' >&2
+  exit 78
 fi
-if [ "$overlay_ok" -ne 1 ]; then
-  printf 'jaiph docker: workspace overlay unavailable; copying workspace into a temp directory before startup (live output begins after the copy completes; excludes .jaiph/runs)' >&2
-  if [ -n "$overlay_reason" ]; then
-    printf ' (%s)' "$overlay_reason" >&2
-  fi
-  printf '\n' >&2
-  tmp_workspace="$(mktemp -d /tmp/jaiph-workspace.XXXXXX 2>/dev/null || true)"
-  if [ -n "$tmp_workspace" ]; then
-    if command -v rsync >/dev/null 2>&1; then
-      if copy_workspace_with_rsync "$tmp_workspace" 2>/tmp/jaiph-workspace-copy.err; then
-        RUNTIME_WORKSPACE="$tmp_workspace"
-        printf 'jaiph docker: workspace overlay unavailable; using copy fallback at %s' "$RUNTIME_WORKSPACE" >&2
-        if [ -n "$overlay_reason" ]; then
-          printf ' (%s)' "$overlay_reason" >&2
-        fi
-        printf '\n' >&2
-        overlay_ok=1
-      else
-        copy_reason="$(tr '\n' ' ' </tmp/jaiph-workspace-copy.err | sed 's/[[:space:]]\+/ /g; s/^ //; s/ $//')"
-        rm -rf "$tmp_workspace"
-        printf 'jaiph docker: workspace overlay unavailable and copy fallback failed; container workspace may be incomplete' >&2
-        if [ -n "$overlay_reason" ]; then
-          printf ' (%s)' "$overlay_reason" >&2
-        fi
-        if [ -n "$copy_reason" ]; then
-          printf ' [copy fallback: %s]' "$copy_reason" >&2
-        fi
-        printf '\n' >&2
-      fi
-    else
-      if copy_workspace_with_cp "$tmp_workspace" 2>/tmp/jaiph-workspace-cp.err; then
-        RUNTIME_WORKSPACE="$tmp_workspace"
-        printf 'jaiph docker: workspace overlay unavailable; using cp fallback at %s' "$RUNTIME_WORKSPACE" >&2
-        if [ -n "$overlay_reason" ]; then
-          printf ' (%s)' "$overlay_reason" >&2
-        fi
-        printf '\n' >&2
-        overlay_ok=1
-      else
-        cp_reason="$(tr '\n' ' ' </tmp/jaiph-workspace-cp.err | sed 's/[[:space:]]\+/ /g; s/^ //; s/ $//')"
-        rm -rf "$tmp_workspace"
-        printf 'jaiph docker: workspace overlay unavailable and copy fallbacks are unavailable; container workspace may be incomplete' >&2
-        if [ -n "$overlay_reason" ]; then
-          printf ' (%s)' "$overlay_reason" >&2
-        fi
-        if [ -n "$cp_reason" ]; then
-          printf ' [cp fallback: %s]' "$cp_reason" >&2
-        fi
-        printf '\n' >&2
-      fi
-    fi
-  else
-    printf 'jaiph docker: workspace overlay unavailable and temp workspace allocation failed; container workspace may be incomplete' >&2
-    if [ -n "$overlay_reason" ]; then
-      printf ' (%s)' "$overlay_reason" >&2
-    fi
-    printf '\n' >&2
-  fi
+if [ ! -e /dev/fuse ]; then
+  printf 'E_DOCKER_OVERLAY /dev/fuse not present in container; pass --device /dev/fuse or set JAIPH_DOCKER_NO_OVERLAY=1 to use the copy sandbox path\\n' >&2
+  exit 78
 fi
-
-if [ "$RUNTIME_WORKSPACE" != "$MERGED" ]; then
-  export JAIPH_WORKSPACE="$RUNTIME_WORKSPACE"
-  if [ -n "\${JAIPH_AGENT_TRUSTED_WORKSPACE:-}" ]; then
-    export JAIPH_AGENT_TRUSTED_WORKSPACE="$(rewrite_workspace_path "$JAIPH_AGENT_TRUSTED_WORKSPACE")"
-  fi
-  rewritten_args=()
-  for arg in "$@"; do
-    rewritten_args+=("$(rewrite_workspace_path "$arg")")
-  done
-  cd "$RUNTIME_WORKSPACE"
-  exec "\${rewritten_args[@]}"
+if ! fuse-overlayfs -o "lowerdir=$LOWER,upperdir=$UPPER,workdir=$WORK" "$MERGED" 2>/tmp/jaiph-fuse-overlay.err; then
+  reason="$(tr '\\n' ' ' </tmp/jaiph-fuse-overlay.err | sed 's/[[:space:]]\\+/ /g; s/^ //; s/ $//')"
+  printf 'E_DOCKER_OVERLAY fuse-overlayfs mount failed: %s\\n' "$reason" >&2
+  exit 78
 fi
 
 cd "$MERGED"
@@ -466,6 +350,128 @@ export function writeOverlayScript(): string {
   return scriptPath;
 }
 
+// ---------------------------------------------------------------------------
+// Sandbox mode selection + host-side workspace clone
+// ---------------------------------------------------------------------------
+
+/** Selected sandbox primitive for a Docker run. */
+export type SandboxMode = "overlay" | "copy";
+
+/**
+ * Choose the sandbox mode for the upcoming run.
+ *
+ * Heuristic: presence of `/dev/fuse` on the host is a strong proxy for
+ * fuse-overlayfs viability inside the container. Linux dev/CI hosts typically
+ * have it; macOS Docker Desktop typically doesn't expose it. Override with
+ * `JAIPH_DOCKER_NO_OVERLAY=1` to force the host-copy path.
+ */
+export function selectSandboxMode(env: Record<string, string | undefined>): SandboxMode {
+  if (env.JAIPH_DOCKER_NO_OVERLAY === "1" || env.JAIPH_DOCKER_NO_OVERLAY === "true") {
+    return "copy";
+  }
+  return existsSync("/dev/fuse") ? "overlay" : "copy";
+}
+
+/** Run `cp` with the given flags. Returns true on success. */
+function tryCp(flags: string[], src: string, dst: string): { ok: boolean; stderr: string } {
+  const r = spawnSync("cp", [...flags, src, dst], { stdio: ["ignore", "ignore", "pipe"] });
+  return { ok: r.status === 0, stderr: r.stderr?.toString() ?? "" };
+}
+
+/**
+ * Copy a single top-level entry into the sandbox workspace.
+ *
+ * On macOS, prefers `cp -cR` (APFS clonefile, O(1) per file). On any
+ * platform/filesystem where clonefile fails (or on Linux where BSD `-c` isn't
+ * supported), falls back to plain `cp -R` and notes the fallback for the caller
+ * to surface as a one-time warning.
+ */
+function copyEntryWithCloneFallback(
+  src: string,
+  dst: string,
+  state: { cloneAttempted: boolean; cloneSupported: boolean; firstFallbackReason: string | null },
+): void {
+  if (process.platform === "darwin") {
+    if (!state.cloneAttempted) {
+      state.cloneAttempted = true;
+      const r = tryCp(["-cR"], src, dst);
+      if (r.ok) {
+        state.cloneSupported = true;
+        return;
+      }
+      state.firstFallbackReason = r.stderr.trim().split("\n")[0] || "cp -cR failed";
+      const fb = tryCp(["-pR"], src, dst);
+      if (!fb.ok) {
+        throw new Error(`E_DOCKER_SANDBOX_COPY failed to copy ${src} → ${dst}: ${fb.stderr.trim()}`);
+      }
+      return;
+    }
+    if (state.cloneSupported) {
+      const r = tryCp(["-cR"], src, dst);
+      if (r.ok) return;
+    }
+    const fb = tryCp(["-pR"], src, dst);
+    if (!fb.ok) {
+      throw new Error(`E_DOCKER_SANDBOX_COPY failed to copy ${src} → ${dst}: ${fb.stderr.trim()}`);
+    }
+    return;
+  }
+  const r = tryCp(["-pR"], src, dst);
+  if (!r.ok) {
+    throw new Error(`E_DOCKER_SANDBOX_COPY failed to copy ${src} → ${dst}: ${r.stderr.trim()}`);
+  }
+}
+
+/**
+ * Clone the host workspace into a sandbox directory.
+ *
+ * - macOS: tries `cp -cR` (APFS clonefile, O(1)); on failure, falls back to
+ *   `cp -pR` (real copy) with a single stderr warning noting the reason.
+ * - Linux/other: uses `cp -pR` directly. The slow case (no fuse-overlayfs +
+ *   non-COW filesystem) is documented; users on those hosts pay the copy cost.
+ *
+ * Excludes `.jaiph/runs` (mounted separately at `/jaiph/run`) and `.git/objects`
+ * is intentionally NOT excluded — workflows may need git history.
+ */
+export function cloneWorkspaceForSandbox(
+  srcRoot: string,
+  dstRoot: string,
+  warn: (msg: string) => void = (m) => process.stderr.write(`${m}\n`),
+): void {
+  mkdirSync(dstRoot, { recursive: true });
+  const state = { cloneAttempted: false, cloneSupported: false, firstFallbackReason: null as string | null };
+
+  for (const entry of readdirSync(srcRoot, { withFileTypes: true })) {
+    if (entry.name === ".jaiph") continue;
+    copyEntryWithCloneFallback(join(srcRoot, entry.name), join(dstRoot, entry.name), state);
+  }
+
+  const jaiphSrc = join(srcRoot, ".jaiph");
+  if (existsSync(jaiphSrc)) {
+    const jaiphDst = join(dstRoot, ".jaiph");
+    mkdirSync(jaiphDst, { recursive: true });
+    for (const entry of readdirSync(jaiphSrc, { withFileTypes: true })) {
+      if (entry.name === "runs") continue;
+      copyEntryWithCloneFallback(join(jaiphSrc, entry.name), join(jaiphDst, entry.name), state);
+    }
+  }
+
+  if (process.platform === "darwin" && state.cloneAttempted && !state.cloneSupported) {
+    warn(
+      `jaiph docker: clonefile (cp -cR) unavailable on this filesystem; using plain copy ` +
+      `(${state.firstFallbackReason ?? "unknown reason"}). Workspace clone may be slow for large trees.`,
+    );
+  }
+}
+
+/** Allocate a fresh sandbox workspace directory adjacent to the runs root. */
+export function allocateSandboxWorkspaceDir(runsRoot: string): string {
+  const id = randomBytes(4).toString("hex");
+  const dir = join(runsRoot, `.sandbox-${id}`);
+  mkdirSync(dir, { recursive: true });
+  return dir;
+}
+
 // ---------------------------------------------------------------------------
 // Docker command builder
 // ---------------------------------------------------------------------------
@@ -479,6 +485,18 @@ export interface DockerSpawnOptions {
   runArgs: string[];
   env: Record<string, string | undefined>;
   isTTY: boolean;
+  /**
+   * How to make the workspace appear writable inside the container.
+   *  - "overlay": bind workspace ro, set up fuse-overlayfs in-container.
+   *  - "copy":    pre-clone workspace on host, bind the clone rw.
+   * Defaults to `selectSandboxMode(env)` when omitted.
+   */
+  sandboxMode?: SandboxMode;
+  /**
+   * Required when `sandboxMode === "copy"`: the host path of the cloned
+   * workspace to bind at `/jaiph/workspace`. Caller owns its lifecycle.
+   */
+  sandboxWorkspaceDir?: string;
 }
 
 export const CONTAINER_WORKSPACE = "/jaiph/workspace";
@@ -555,26 +573,38 @@ export function overlayMountPath(containerPath: string): string {
 /**
  * Build the `docker run --rm` argument list.
  *
- * Mounts:
- *  1. workspace → /jaiph/workspace-ro:ro  (overlay lower layer / copy source)
- *  2. sandboxRunDir → /jaiph/run:rw       (single run artifacts)
- *
- * The image already contains a `/jaiph/workspace` directory used as the overlay
- * merge target. When overlay mounts are unavailable, `overlay-run.sh` falls back
- * to a writable per-run workspace copy under `/tmp`. `/jaiph/run` is outside the
- * overlay, so run artifacts still persist to the host mount.
+ * Two sandbox shapes:
+ *  - "overlay": workspace bind-mounts ro at /jaiph/workspace-ro; entrypoint
+ *    script sets up fuse-overlayfs at /jaiph/workspace. Requires SYS_ADMIN
+ *    and /dev/fuse. Run artifacts mount at /jaiph/run (outside the overlay).
+ *  - "copy": host pre-clones workspace to `opts.sandboxWorkspaceDir`; that
+ *    dir bind-mounts rw at /jaiph/workspace. No overlay script, no fuse,
+ *    no SYS_ADMIN. Run artifacts mount at /jaiph/run as before.
  *
  * The container runs `jaiph run --raw <file>` using its own installed jaiph.
+ *
+ * `overlayScriptPath` is required for "overlay" mode and ignored for "copy".
  */
-export function buildDockerArgs(opts: DockerSpawnOptions, overlayScriptPath: string): string[] {
+export function buildDockerArgs(opts: DockerSpawnOptions, overlayScriptPath?: string): string[] {
+  const mode: SandboxMode = opts.sandboxMode ?? selectSandboxMode(opts.env);
+  if (mode === "overlay" && !overlayScriptPath) {
+    throw new Error("buildDockerArgs: overlay mode requires overlayScriptPath");
+  }
+  if (mode === "copy" && !opts.sandboxWorkspaceDir) {
+    throw new Error("buildDockerArgs: copy mode requires sandboxWorkspaceDir");
+  }
+
   const args: string[] = ["run", "--rm"];
 
-  // Least-privilege: drop all capabilities, re-add only SYS_ADMIN for fuse-overlayfs
   args.push("--cap-drop", "ALL");
-  args.push("--cap-add", "SYS_ADMIN");
+  if (mode === "overlay") {
+    args.push("--cap-add", "SYS_ADMIN");
+  }
   args.push("--security-opt", "no-new-privileges");
 
-  args.push("--device", "/dev/fuse");
+  if (mode === "overlay") {
+    args.push("--device", "/dev/fuse");
+  }
 
   if (process.platform === "linux") {
     try {
@@ -590,22 +620,42 @@ export function buildDockerArgs(opts: DockerSpawnOptions, overlayScriptPath: str
     args.push("--network", opts.config.network);
   }
 
-  // Workspace inputs: mounted only at the overlay lower-layer path.
-  for (const mount of opts.config.mounts) {
-    const hostAbs = resolve(opts.workspaceRoot, mount.hostPath);
+  if (mode === "overlay") {
+    // Workspace inputs land at the overlay lower-layer path; overlay script merges them rw.
+    for (const mount of opts.config.mounts) {
+      const hostAbs = resolve(opts.workspaceRoot, mount.hostPath);
+      validateMountHostPath(hostAbs);
+      args.push("-v", `${hostAbs}:${overlayMountPath(mount.containerPath)}:ro`);
+    }
+  } else {
+    // Pre-cloned workspace mounts rw directly at /jaiph/workspace.
+    const hostAbs = resolve(opts.sandboxWorkspaceDir!);
     validateMountHostPath(hostAbs);
-    args.push("-v", `${hostAbs}:${overlayMountPath(mount.containerPath)}:ro`);
+    args.push("-v", `${hostAbs}:${CONTAINER_WORKSPACE}:rw`);
+    // Honor any additional sub-mounts (e.g. "config:ro") relative to the cloned
+    // workspace, so users can still pin parts as ro inside the container.
+    for (const mount of opts.config.mounts) {
+      if (mount.containerPath === CONTAINER_WORKSPACE) continue;
+      const subRel = relative(CONTAINER_WORKSPACE, mount.containerPath);
+      if (subRel.startsWith("..")) {
+        // External (non-workspace) mounts: bind the original host path through.
+        const extAbs = resolve(opts.workspaceRoot, mount.hostPath);
+        validateMountHostPath(extAbs);
+        args.push("-v", `${extAbs}:${mount.containerPath}:${mount.mode}`);
+      } else {
+        const subAbs = join(hostAbs, subRel);
+        args.push("-v", `${subAbs}:${mount.containerPath}:${mount.mode}`);
+      }
+    }
   }
 
-  // Single run directory: rw mount outside the overlay
   args.push("-v", `${opts.sandboxRunDir}:${CONTAINER_RUN_DIR}:rw`);
 
-  // Overlay entrypoint script (runtime-generated, mounted ro)
-  args.push("-v", `${overlayScriptPath}:/jaiph/overlay-run.sh:ro`);
+  if (mode === "overlay") {
+    args.push("-v", `${overlayScriptPath}:/jaiph/overlay-run.sh:ro`);
+  }
 
-  // Environment
   const containerEnv = remapDockerEnv(opts.env);
-
   for (const [key, value] of Object.entries(containerEnv)) {
     if (value === undefined) continue;
     if (isEnvDenied(key)) continue;
@@ -620,14 +670,21 @@ export function buildDockerArgs(opts: DockerSpawnOptions, overlayScriptPath: str
   args.push("-w", CONTAINER_WORKSPACE);
   args.push(opts.config.image);
 
-  // Command: overlay wrapper → jaiph run --raw
   const relSource = relative(opts.workspaceRoot, opts.sourceAbs);
-  args.push(
-    "/jaiph/overlay-run.sh",
-    "jaiph", "run", "--raw",
-    `${CONTAINER_WORKSPACE}/${relSource}`,
-    ...opts.runArgs,
-  );
+  if (mode === "overlay") {
+    args.push(
+      "/jaiph/overlay-run.sh",
+      "jaiph", "run", "--raw",
+      `${CONTAINER_WORKSPACE}/${relSource}`,
+      ...opts.runArgs,
+    );
+  } else {
+    args.push(
+      "jaiph", "run", "--raw",
+      `${CONTAINER_WORKSPACE}/${relSource}`,
+      ...opts.runArgs,
+    );
+  }
 
   return args;
 }
@@ -640,8 +697,14 @@ export interface DockerSpawnResult {
   child: ChildProcess;
   /** Host directory mounted at /jaiph/run — scan for artifacts after exit. */
   sandboxRunDir: string;
-  /** Temp directory containing overlay-run.sh — cleaned up after exit. */
-  overlayScriptDir: string;
+  /** Selected sandbox primitive for this run. */
+  sandboxMode: SandboxMode;
+  /** Temp directory containing overlay-run.sh — cleaned up after exit (overlay mode). */
+  overlayScriptDir?: string;
+  /** Pre-cloned workspace dir mounted rw — removed on cleanup unless kept (copy mode). */
+  sandboxWorkspaceDir?: string;
+  /** When true, cleanup leaves `sandboxWorkspaceDir` on disk for debugging. */
+  keepSandboxWorkspace: boolean;
   timeoutTimer?: NodeJS.Timeout;
 }
 
@@ -650,15 +713,35 @@ export interface DockerSpawnResult {
  *
  * The container runs `jaiph run --raw <file>` using its own installed jaiph.
  * Events flow via stderr; stdout carries workflow output.
+ *
+ * Sandbox mode is picked from `opts.sandboxMode` if set, otherwise
+ * `selectSandboxMode(opts.env)`. In "copy" mode the workspace is cloned to a
+ * fresh `<runsRoot>/.sandbox-<id>/` directory (or the provided
+ * `opts.sandboxWorkspaceDir`) before launch.
  */
 export function spawnDockerProcess(opts: DockerSpawnOptions): DockerSpawnResult {
   checkDockerAvailable();
   const resolvedImage = resolveImage(opts.config);
   opts = { ...opts, config: { ...opts.config, image: resolvedImage } };
 
+  const mode: SandboxMode = opts.sandboxMode ?? selectSandboxMode(opts.env);
   mkdirSync(opts.sandboxRunDir, { recursive: true });
-  const overlayScriptPath = writeOverlayScript();
-  const overlayScriptDir = dirname(overlayScriptPath);
+
+  let overlayScriptPath: string | undefined;
+  let overlayScriptDir: string | undefined;
+  let sandboxWorkspaceDir: string | undefined;
+  const keepSandboxWorkspace =
+    opts.env.JAIPH_DOCKER_KEEP_SANDBOX === "1" || opts.env.JAIPH_DOCKER_KEEP_SANDBOX === "true";
+
+  if (mode === "overlay") {
+    overlayScriptPath = writeOverlayScript();
+    overlayScriptDir = dirname(overlayScriptPath);
+  } else {
+    sandboxWorkspaceDir = opts.sandboxWorkspaceDir ?? allocateSandboxWorkspaceDir(opts.sandboxRunDir);
+    cloneWorkspaceForSandbox(opts.workspaceRoot, sandboxWorkspaceDir);
+  }
+
+  opts = { ...opts, sandboxMode: mode, sandboxWorkspaceDir };
   const dockerArgs = buildDockerArgs(opts, overlayScriptPath);
 
   const child = spawn("docker", dockerArgs, {
@@ -685,20 +768,40 @@ export function spawnDockerProcess(opts: DockerSpawnOptions): DockerSpawnResult
     }, opts.config.timeout * 1000);
   }
 
-  return { child, sandboxRunDir: opts.sandboxRunDir, overlayScriptDir, timeoutTimer };
+  return {
+    child,
+    sandboxRunDir: opts.sandboxRunDir,
+    sandboxMode: mode,
+    overlayScriptDir,
+    sandboxWorkspaceDir,
+    keepSandboxWorkspace,
+    timeoutTimer,
+  };
 }
 
 /**
  * Clean up Docker resources after execution.
+ *
+ * Removes the overlay script tempdir (overlay mode) and the cloned workspace
+ * (copy mode), unless `JAIPH_DOCKER_KEEP_SANDBOX=1` was set.
  */
 export function cleanupDocker(result: DockerSpawnResult): void {
   if (result.timeoutTimer) {
     clearTimeout(result.timeoutTimer);
   }
-  try {
-    rmSync(result.overlayScriptDir, { recursive: true, force: true });
-  } catch {
-    // Best-effort cleanup
+  if (result.overlayScriptDir) {
+    try {
+      rmSync(result.overlayScriptDir, { recursive: true, force: true });
+    } catch {
+      // Best-effort cleanup
+    }
+  }
+  if (result.sandboxWorkspaceDir && !result.keepSandboxWorkspace) {
+    try {
+      rmSync(result.sandboxWorkspaceDir, { recursive: true, force: true });
+    } catch {
+      // Best-effort cleanup
+    }
   }
 }
 

From 7d1255b836db489d19b33cfde648e1aeb57dd4e6 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Tue, 21 Apr 2026 06:57:58 +0200
Subject: [PATCH 24/38] ci(docker): publish runtime image for amd64 and arm64

Add Docker Buildx and multi-platform build so GHCR tags include
linux/arm64 for Apple Silicon hosts, alongside linux/amd64.

Made-with: Cursor
---
 .github/workflows/docker-publish.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
index ab7c4b01..58821143 100644
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -28,6 +28,9 @@ jobs:
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
 
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
       - name: Image tags and jaiph ref
         id: meta
         run: |
@@ -46,6 +49,7 @@ jobs:
           context: runtime
           file: runtime/Dockerfile
           push: true
+          platforms: linux/amd64,linux/arm64
           tags: ${{ steps.meta.outputs.tags }}
           build-args: |
             JAIPH_REPO_REF=${{ steps.meta.outputs.jaiph_ref }}

From be2de569c04a78bd7be62a83c50f5287df226c6f Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Tue, 21 Apr 2026 07:15:31 +0200
Subject: [PATCH 25/38] CI: fix three failures from the docker sandbox rework
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1. landing-page.spec.ts (chromium) — docs/index.html had stale expected
   output for the agent_inbox sample with the old escaped-quote rendering
   ("\"Found 3 issues\"" / "Critical issue: \"Summary: ...\""). The
   runtime no longer emits those escapes (see prior CHANGELOG entry on
   formatNamedParamsForDisplay). Updated the page sample to match the
   current clean rendering — the test pulls expected text from the page
   verbatim, so this is the source of truth.

2. macos runner — 104_run_async fanout flake. Async branches complete
   in non-deterministic order; the previous slow rsync sandbox happened
   to serialize timing, masking the race. With the fast clone path
   (cp -cR) the race surfaces. Fix in e2e/lib/common.sh: extend
   normalize_output with a perl pass that sorts contiguous "async-progress"
   lines (lines starting with a leading space + subscript marker
   ₁..₉, UTF-8 bytes E2 82 81..89). Both actual and expected get the
   same canonical order, so strict equality still works while the
   inter-branch race is normalized away. No per-test changes needed —
   verified against fanout, sibling_depth, circled, nested_async,
   async_interleave and 78_lang_redesign_constructs.

3. ubuntu runner — 72_docker_run_artifacts: fuse-overlayfs mount fails
   with "Permission denied" even with SYS_ADMIN + /dev/fuse. Root cause
   is the default Docker AppArmor profile shipped on Ubuntu 22.04+ /
   GitHub Actions runners, which denies fuse mounts in containers.
   Documented workaround: --security-opt apparmor=unconfined. Added it
   to overlay mode args, Linux-only (macOS Docker Desktop has no
   AppArmor and rejects unknown security-opts). Tests cover both that
   the flag is added in overlay mode on Linux and absent in copy mode.
   docs/sandboxing.md updated with the rationale and remaining failure
   modes (rootless docker, locked-down kernels) where the operator
   still needs JAIPH_DOCKER_NO_OVERLAY=1.

Made-with: Cursor
---
 docs/index.html            |  6 +++---
 docs/sandboxing.md         |  4 ++--
 e2e/lib/common.sh          | 20 +++++++++++++++++++-
 src/runtime/docker.test.ts | 24 ++++++++++++++++++++++++
 src/runtime/docker.ts      |  9 +++++++++
 5 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/docs/index.html b/docs/index.html
index fba5fb85..020d3230 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -360,12 +360,12 @@ <h2 id="samples">Samples</h2>
   <span class="run-marker">▸</span> <span class="run-keyword">workflow</span> scanner
   <span class="run-marker">·</span>   <span class="run-marker">ℹ</span> Scanning for issues...
   <span class="run-pass">✓</span> <span class="run-time">workflow scanner (0s)</span>
-  <span class="run-marker">▸</span> <span class="run-keyword">workflow</span> analyst <span class="run-params">(message="\"Found 3 issues in auth module\"", chan="findings", sender="scanner")</span>
+  <span class="run-marker">▸</span> <span class="run-keyword">workflow</span> analyst <span class="run-params">(message="Found 3 issues in auth module", chan="findings", sender="scanner")</span>
   <span class="run-marker">·</span>   <span class="run-marker">ℹ</span> Analyzing message from scanner on channel findings...
   <span class="run-pass">✓</span> <span class="run-time">workflow analyst (0s)</span>
-  <span class="run-marker">▸</span> <span class="run-keyword">workflow</span> reviewer <span class="run-params">(message="\"Summary: \"Found 3 issues in aut...", chan="report", sender="analyst")</span>
+  <span class="run-marker">▸</span> <span class="run-keyword">workflow</span> reviewer <span class="run-params">(message="Summary: Found 3 issues in auth ...", chan="report", sender="analyst")</span>
   <span class="run-marker">·</span>   <span class="run-marker">ℹ</span> Reviewing message from analyst on channel report...
-  <span class="run-marker">·</span>   <span class="run-fail">! Critical issue: "Summary: "Found 3 issues in auth module""</span>
+  <span class="run-marker">·</span>   <span class="run-fail">! Critical issue: Summary: Found 3 issues in auth module</span>
   <span class="run-pass">✓</span> <span class="run-time">workflow reviewer (0s)</span>
 
 <span class="run-pass">✓ PASS</span> <span class="run-keyword">workflow</span> default <span class="run-time">(0s)</span></code></pre>
diff --git a/docs/sandboxing.md b/docs/sandboxing.md
index b8e39f28..949ff263 100644
--- a/docs/sandboxing.md
+++ b/docs/sandboxing.md
@@ -156,7 +156,7 @@ The working directory is `/jaiph/workspace`. In overlay mode the host CLI genera
 
 **Events** -- The container's jaiph runs in `--raw` mode: it spawns the runtime with inherited stdio, so `__JAIPH_EVENT__` JSON flows directly to the container's stderr. The host CLI reads Docker's stderr pipe and renders the progress tree. stdout carries plain script output. `STEP_END` events embed `out_content` (and `err_content` on failure) so consumers do not need host paths to step artifact files.
 
-**Sandbox primitive (overlay vs. copy)** -- Selected at launch time. If `/dev/fuse` exists on the host, the CLI uses **overlay mode**: the `overlay-run.sh` wrapper (generated by the host CLI and mounted read-only) sets up `fuse-overlayfs` with the ro bind mount (`/jaiph/workspace-ro`) as the lower layer and a tmpfs as the upper layer, merged at `/jaiph/workspace`. All workspace writes go to the tmpfs and are discarded on container exit. If `fuse-overlayfs` is missing from the image at runtime, the entrypoint exits with `E_DOCKER_OVERLAY` -- there is no in-container fallback. Set `JAIPH_DOCKER_NO_OVERLAY=1` on the host to opt into copy mode instead.
+**Sandbox primitive (overlay vs. copy)** -- Selected at launch time. If `/dev/fuse` exists on the host, the CLI uses **overlay mode**: the `overlay-run.sh` wrapper (generated by the host CLI and mounted read-only) sets up `fuse-overlayfs` with the ro bind mount (`/jaiph/workspace-ro`) as the lower layer and a tmpfs as the upper layer, merged at `/jaiph/workspace`. All workspace writes go to the tmpfs and are discarded on container exit. On Linux hosts, the overlay container is also launched with `--security-opt apparmor=unconfined` because the default Docker AppArmor profile (active on Ubuntu 22.04+, GitHub Actions runners, and similar) denies fuse mounts even when `SYS_ADMIN` and `/dev/fuse` are present. If `fuse-overlayfs` is missing from the image or the mount still fails at runtime, the entrypoint exits with `E_DOCKER_OVERLAY` -- there is no in-container fallback. Set `JAIPH_DOCKER_NO_OVERLAY=1` on the host to opt into copy mode instead.
 
 If `/dev/fuse` is missing on the host, the CLI uses **copy mode**: before launching the container it clones the workspace into `<runs-root>/.sandbox-<id>/` (excluding `.jaiph/runs`) using `cp -cR` on macOS (APFS clonefile, O(1) per file) or `cp -pR` elsewhere (a real copy; a single stderr warning is printed when the fast path is unavailable). The clone is bind-mounted rw at `/jaiph/workspace`. After the container exits the clone is removed unless `JAIPH_DOCKER_KEEP_SANDBOX=1` is set, in which case the path is left in place and printed to stderr for debugging.
 
@@ -183,7 +183,7 @@ Docker-related errors use `E_DOCKER_*` codes for programmatic detection:
 | `E_DOCKER_BUILD` | `docker build` fails when using helpers that build from a Dockerfile | Not used during normal `jaiph run` image resolution. |
 | `E_DOCKER_NO_JAIPH` | Selected image does not contain a `jaiph` CLI | Run exits with guidance to use the official image or install jaiph. |
 | `E_DOCKER_RUNS_DIR` | Absolute `JAIPH_RUNS_DIR` points outside the workspace | Run exits. Use a relative path or an absolute path within the workspace. |
-| `E_DOCKER_OVERLAY` | Overlay mode selected but `fuse-overlayfs` is missing from the image or the mount fails inside the container | Container exits with code 78. Use the official runtime image, install `fuse-overlayfs` in your custom image, or set `JAIPH_DOCKER_NO_OVERLAY=1` on the host to switch to copy mode. |
+| `E_DOCKER_OVERLAY` | Overlay mode selected but `fuse-overlayfs` is missing from the image or the mount fails inside the container | Container exits with code 78. Use the official runtime image, install `fuse-overlayfs` in your custom image, or set `JAIPH_DOCKER_NO_OVERLAY=1` on the host to switch to copy mode. The CLI already passes `--security-opt apparmor=unconfined` on Linux to defeat the default AppArmor fuse-deny; remaining failures usually mean the host kernel itself blocks fuse mounts (rootless docker without the right user-namespace setup, locked-down kernel, etc.). |
 | `E_DOCKER_SANDBOX_COPY` | Copy mode failed to clone the host workspace (`cp` returned non-zero) | Run exits before container launch. Inspect the path printed in the error. |
 | `E_VALIDATE_MOUNT` | Mount targets a denied host path (`/`, `/proc`, docker socket, etc.) | Run exits before container launch. |
 | `E_TIMEOUT` | Container exceeds `runtime.docker_timeout` seconds | Container receives SIGTERM, then SIGKILL after 5s grace period. |
diff --git a/e2e/lib/common.sh b/e2e/lib/common.sh
index d8c157b3..cc2b7fe6 100644
--- a/e2e/lib/common.sh
+++ b/e2e/lib/common.sh
@@ -74,6 +74,12 @@ e2e::assert_equals() {
 e2e::normalize_output() {
   local input="$1"
   # Strip ANSI and normalize timing values for stable assertions.
+  # Final perl step canonicalizes the order of contiguous "async-progress"
+  # lines (lines starting with one or more spaces followed by a subscript
+  # marker ₁..₉, UTF-8 bytes E2 82 81..89). Async branches that run in
+  # parallel complete in non-deterministic order; sorting both actual and
+  # expected with the same stable order makes strict equality usable while
+  # still asserting that the same set of progress lines was emitted.
   printf "%s" "${input}" \
     | sed -E $'s/\x1B\\[[0-9;]*[A-Za-z]//g' \
     | sed -E 's/\(([0-9]+(\.[0-9]+)?s|[0-9]+m [0-9]+s)\)/(<time>)/g' \
@@ -84,7 +90,19 @@ e2e::normalize_output() {
     | sed -E 's/^( *)(cursor-agent|printf %s) .*$/\1<agent-command>/g' \
     | sed -E 's/\(1="\/[^"]*"/(1="<script-path>"/g' \
     | sed -E 's/[[:space:]]+$//g' \
-    | perl -0777 -pe 's/([^\n])\n(✓ PASS)/$1\n\n$2/g'
+    | perl -0777 -pe 's/([^\n])\n(✓ PASS)/$1\n\n$2/g' \
+    | perl -e '
+        use strict; use warnings;
+        binmode STDIN;
+        binmode STDOUT;
+        my @buf;
+        sub flush { print join("", sort @buf); @buf = (); }
+        while (my $line = <STDIN>) {
+          if ($line =~ /^ +\xe2\x82[\x81-\x89]/) { push @buf, $line; }
+          else { flush(); print $line; }
+        }
+        flush();
+      '
 }
 
 e2e::assert_output_equals() {
diff --git a/src/runtime/docker.test.ts b/src/runtime/docker.test.ts
index b3196ebc..51dbe90d 100644
--- a/src/runtime/docker.test.ts
+++ b/src/runtime/docker.test.ts
@@ -631,6 +631,30 @@ test("buildDockerArgs: includes --cap-drop ALL and --security-opt no-new-privile
   assert.equal(args[secOptIdx + 1], "no-new-privileges");
 });
 
+test("buildDockerArgs: overlay mode adds --security-opt apparmor=unconfined on Linux to allow fuse mounts", () => {
+  if (process.platform !== "linux") return;
+  const args = buildDockerArgs(defaultOpts(), TEST_OVERLAY);
+  const secOptIndices = args
+    .map((v, i) => (v === "--security-opt" ? i : -1))
+    .filter((i) => i >= 0);
+  const values = secOptIndices.map((i) => args[i + 1]);
+  assert.ok(values.includes("apparmor=unconfined"), "apparmor=unconfined present in overlay mode");
+});
+
+test("buildDockerArgs: copy mode does not add --security-opt apparmor=unconfined", () => {
+  const cloneDir = mkdtempSync(join(tmpdir(), "jaiph-test-clone-"));
+  try {
+    const args = buildDockerArgs(copyOpts(cloneDir));
+    const secOptIndices = args
+      .map((v, i) => (v === "--security-opt" ? i : -1))
+      .filter((i) => i >= 0);
+    const values = secOptIndices.map((i) => args[i + 1]);
+    assert.ok(!values.includes("apparmor=unconfined"), "no apparmor flag needed in copy mode");
+  } finally {
+    rmSync(cloneDir, { recursive: true, force: true });
+  }
+});
+
 // ---------------------------------------------------------------------------
 // buildDockerArgs: copy-mode sandbox (host pre-clones workspace, mounts rw)
 // ---------------------------------------------------------------------------
diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts
index 417927ad..4451b348 100644
--- a/src/runtime/docker.ts
+++ b/src/runtime/docker.ts
@@ -604,6 +604,15 @@ export function buildDockerArgs(opts: DockerSpawnOptions, overlayScriptPath?: st
 
   if (mode === "overlay") {
     args.push("--device", "/dev/fuse");
+    // Many Linux hosts (Ubuntu 22.04+, GitHub Actions runners, etc.) ship a
+    // default AppArmor profile that denies fuse mounts inside containers
+    // even when SYS_ADMIN + /dev/fuse are present. Unconfining apparmor for
+    // this single container restores the documented fuse-overlayfs
+    // behavior. Linux-only: macOS Docker Desktop has no AppArmor and
+    // rejects unknown security-opts on some versions.
+    if (process.platform === "linux") {
+      args.push("--security-opt", "apparmor=unconfined");
+    }
   }
 
   if (process.platform === "linux") {

From e940ada4d20e2095241dc3901b175503ee576ba5 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Tue, 21 Apr 2026 07:23:11 +0200
Subject: [PATCH 26/38] feat(cli): show Docker sandbox hint on running banner
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Print a dim parenthetical after the .jh name: (no sandbox), local Docker
  detail (fusefs vs tmp dir), or (Docker sandbox, …) when CI=true/1 to avoid
  host-dependent snapshots.
- Resolve runtime env before the banner so sandbox mode matches spawnDockerProcess.
- E2E: normalize_output strips the parenthetical so existing expected stdout
  blocks stay stable.
- Docs: landing-page run samples use run-banner-meta for the gray suffix.

Made-with: Cursor
---
 docs/assets/css/style.css   |  4 +++
 docs/index.html             | 10 +++---
 e2e/lib/common.sh           |  1 +
 src/cli/commands/run.ts     | 25 ++++++++++++---
 src/cli/run/display.test.ts | 64 ++++++++++++++++++++++++++++++++++++-
 src/cli/run/display.ts      | 28 ++++++++++++++++
 6 files changed, 122 insertions(+), 10 deletions(-)

diff --git a/docs/assets/css/style.css b/docs/assets/css/style.css
index d66aebaa..2b3cc45b 100644
--- a/docs/assets/css/style.css
+++ b/docs/assets/css/style.css
@@ -574,6 +574,10 @@ pre code .code-line::before {
     user-select: none;
 }
 
+.jaiph-run .run-banner-meta {
+    color: var(--muted);
+}
+
 .jaiph-run .run-command {
     color: var(--muted);
     font-weight: 600;
diff --git a/docs/index.html b/docs/index.html
index 020d3230..73d3f6de 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -183,7 +183,7 @@ <h2 id="samples">Samples</h2>
                     <p>Running the workflow:</p>
                     <pre><code class="jaiph-run" data-sample-output="success"><span class="run-command">➜  ./say_hello.jh Jakub</span>
 
-Jaiph: Running say_hello.jh
+Jaiph: Running say_hello.jh<span class="run-banner-meta"> (Docker sandbox, fusefs)</span>
 
 <span class="run-keyword">workflow</span> default <span class="run-params">(name="Jakub")</span>
   <span class="run-marker">▸</span> <span class="run-keyword">rule</span> name_was_provided <span class="run-params">(name="Jakub")</span>
@@ -198,7 +198,7 @@ <h2 id="samples">Samples</h2>
                     <p>When you don't provide the name parameter, the workflow fails:</p>
                     <pre><code class="jaiph-run" data-sample-output="failure"><span class="run-command">➜  ./say_hello.jh</span>
 
-Jaiph: Running say_hello.jh
+Jaiph: Running say_hello.jh<span class="run-banner-meta"> (Docker sandbox, fusefs)</span>
 
 <span class="run-keyword">workflow</span> default
   <span class="run-marker">▸</span> <span class="run-keyword">rule</span> name_was_provided
@@ -299,7 +299,7 @@ <h2 id="samples">Samples</h2>
                     </p>
                     <pre><code class="jaiph-run" data-sample-output="run"><span class="run-command">➜  ./recover_loop.jh</span>
 
-Jaiph: Running recover_loop.jh
+Jaiph: Running recover_loop.jh<span class="run-banner-meta"> (Docker sandbox, fusefs)</span>
 
 <span class="run-keyword">workflow</span> default
   <span class="run-marker">▸</span> <span class="run-keyword">workflow</span> check
@@ -354,7 +354,7 @@ <h2 id="samples">Samples</h2>
                     <p>Running the workflow:</p>
                     <pre><code class="jaiph-run" data-sample-output="run"><span class="run-command">➜  ./agent_inbox.jh</span>
 
-Jaiph: Running agent_inbox.jh
+Jaiph: Running agent_inbox.jh<span class="run-banner-meta"> (Docker sandbox, fusefs)</span>
 
 <span class="run-keyword">workflow</span> default
   <span class="run-marker">▸</span> <span class="run-keyword">workflow</span> scanner
@@ -401,7 +401,7 @@ <h2 id="samples">Samples</h2>
                     <p>Running the workflow:</p>
                     <pre><code class="jaiph-run" data-sample-output="run"><span class="run-command">➜  ./async.jh</span>
 
-Jaiph: Running async.jh
+Jaiph: Running async.jh<span class="run-banner-meta"> (Docker sandbox, fusefs)</span>
 
 <span class="run-keyword">workflow</span> default
  <span class="run-marker">₁</span><span class="run-marker">▸</span> <span class="run-keyword">workflow</span> cursor_say_hello
diff --git a/e2e/lib/common.sh b/e2e/lib/common.sh
index cc2b7fe6..5bf7f581 100644
--- a/e2e/lib/common.sh
+++ b/e2e/lib/common.sh
@@ -82,6 +82,7 @@ e2e::normalize_output() {
   # still asserting that the same set of progress lines was emitted.
   printf "%s" "${input}" \
     | sed -E $'s/\x1B\\[[0-9;]*[A-Za-z]//g' \
+    | sed -E 's/^(Jaiph: Running [^ ]+) \(.+\)$/\1/' \
     | sed -E 's/\(([0-9]+(\.[0-9]+)?s|[0-9]+m [0-9]+s)\)/(<time>)/g' \
     | sed -E 's/\(([0-9]+(\.[0-9]+)?s|[0-9]+m [0-9]+s) failed\)/(<time> failed)/g' \
     | sed -E 's/✓ ([0-9]+)(\.[0-9]+)?s/✓ <time>/g' \
diff --git a/src/cli/commands/run.ts b/src/cli/commands/run.ts
index c1ab63ee..a05a04b6 100644
--- a/src/cli/commands/run.ts
+++ b/src/cli/commands/run.ts
@@ -31,6 +31,8 @@ import {
   spawnDockerProcess,
   cleanupDocker,
   resolveDockerHostRunsRoot,
+  selectSandboxMode,
+  type SandboxMode,
 } from "../../runtime/docker";
 import {
   styleKeywordLabel,
@@ -39,7 +41,7 @@ import {
 } from "../run/progress";
 import { loadMergedHooks, registerHooksSubscriber } from "../run/hooks";
 import { resolveRuntimeEnv } from "../run/env";
-import { colorize } from "../run/display";
+import { colorize, formatJaiphRunningBannerLines } from "../run/display";
 import { createRunEmitter } from "../run/emitter";
 import {
   createStderrParser,
@@ -83,10 +85,21 @@ export async function runWorkflow(rest: string[]): Promise<number> {
     const isTTY = !!process.stdout.isTTY;
     const startedAt = Date.now();
 
-    writeBanner(mod, inputAbs, runArgs, colorEnabled, isTTY, startedAt);
-
     const runtimeEnv = resolveRuntimeEnv(effectiveConfig, workspaceRoot, inputAbs);
     runtimeEnv.JAIPH_SOURCE_ABS = inputAbs;
+    const dockerConfigForBanner = resolveDockerConfig(mod.metadata?.runtime, runtimeEnv);
+    const sandboxModeForBanner = dockerConfigForBanner.enabled ? selectSandboxMode(runtimeEnv) : null;
+
+    writeBanner(
+      mod,
+      inputAbs,
+      runArgs,
+      colorEnabled,
+      isTTY,
+      startedAt,
+      dockerConfigForBanner.enabled,
+      sandboxModeForBanner,
+    );
     const { scriptsDir } = buildScripts(inputAbs, outDir, workspaceRoot);
     runtimeEnv.JAIPH_SCRIPTS = scriptsDir;
     const metaFile = join(outDir, `.jaiph-run-meta-${Date.now()}-${process.pid}.txt`);
@@ -220,9 +233,13 @@ function writeBanner(
   colorEnabled: boolean,
   isTTY: boolean,
   startedAt: number,
+  dockerEnabled: boolean,
+  sandboxMode: SandboxMode | null,
 ): void {
   const rootLabel = "workflow default";
-  process.stdout.write(`\nJaiph: Running ${basename(inputAbs)}\n\n`);
+  process.stdout.write(
+    formatJaiphRunningBannerLines(basename(inputAbs), dockerEnabled, sandboxMode, colorEnabled),
+  );
   const defaultWf = mod.workflows.find((w) => w.name === "default");
   const rootParamsSuffix =
     runArgs.length > 0
diff --git a/src/cli/run/display.test.ts b/src/cli/run/display.test.ts
index 187f74ab..6470260f 100644
--- a/src/cli/run/display.test.ts
+++ b/src/cli/run/display.test.ts
@@ -1,6 +1,68 @@
 import test from "node:test";
 import assert from "node:assert/strict";
-import { colorize, formatCompletedLine, formatHeartbeatLine, formatStartLine, sanitizeMultilineLogForTerminal } from "./display";
+import {
+  colorize,
+  formatCompletedLine,
+  formatHeartbeatLine,
+  formatJaiphRunningBannerLines,
+  formatStartLine,
+  sanitizeMultilineLogForTerminal,
+} from "./display";
+
+// === formatJaiphRunningBannerLines ===
+
+test("formatJaiphRunningBannerLines: no Docker shows no sandbox (no color)", () => {
+  const s = formatJaiphRunningBannerLines("say_hello.jh", false, null, false);
+  assert.equal(s, "\nJaiph: Running say_hello.jh (no sandbox)\n\n");
+});
+
+test("formatJaiphRunningBannerLines: Docker overlay shows fusefs locally (no color)", () => {
+  const prev = process.env.CI;
+  delete process.env.CI;
+  try {
+    const s = formatJaiphRunningBannerLines("say_hello.jh", true, "overlay", false);
+    assert.equal(s, "\nJaiph: Running say_hello.jh (Docker sandbox, fusefs)\n\n");
+  } finally {
+    if (prev === undefined) delete process.env.CI;
+    else process.env.CI = prev;
+  }
+});
+
+test("formatJaiphRunningBannerLines: Docker copy shows tmp dir locally (no color)", () => {
+  const prev = process.env.CI;
+  delete process.env.CI;
+  try {
+    const s = formatJaiphRunningBannerLines("say_hello.jh", true, "copy", false);
+    assert.equal(s, "\nJaiph: Running say_hello.jh (Docker sandbox, tmp dir)\n\n");
+  } finally {
+    if (prev === undefined) delete process.env.CI;
+    else process.env.CI = prev;
+  }
+});
+
+test("formatJaiphRunningBannerLines: CI obfuscates Docker sandbox detail", () => {
+  const prev = process.env.CI;
+  process.env.CI = "true";
+  try {
+    const s = formatJaiphRunningBannerLines("say_hello.jh", true, "overlay", false);
+    assert.equal(s, "\nJaiph: Running say_hello.jh (Docker sandbox, …)\n\n");
+  } finally {
+    if (prev === undefined) delete process.env.CI;
+    else process.env.CI = prev;
+  }
+});
+
+test("formatJaiphRunningBannerLines: dim ANSI wraps parenthetical when color on", () => {
+  const prev = process.env.CI;
+  delete process.env.CI;
+  try {
+    const s = formatJaiphRunningBannerLines("x.jh", false, null, true);
+    assert.ok(s.includes("\u001b[2m (no sandbox)\u001b[0m"));
+  } finally {
+    if (prev === undefined) delete process.env.CI;
+    else process.env.CI = prev;
+  }
+});
 
 // === colorize ===
 
diff --git a/src/cli/run/display.ts b/src/cli/run/display.ts
index 7f7af22a..62e40e56 100644
--- a/src/cli/run/display.ts
+++ b/src/cli/run/display.ts
@@ -1,8 +1,36 @@
 import { formatNamedParamsForDisplay, isInternalParamValue } from "../commands/format-params.js";
+import type { SandboxMode } from "../../runtime/docker";
 
 const PROMPT_PREVIEW_MAX = 24;
 const PROMPT_ARGS_DISPLAY_MAX = 96;
 
+function isCiEnvironment(): boolean {
+  const c = process.env.CI;
+  return c === "true" || c === "1";
+}
+
+/**
+ * First stdout lines for `jaiph run`: file name plus a dim parenthetical describing
+ * Docker sandbox mode. In CI, the host-dependent detail (fuse vs copy) is redacted.
+ */
+export function formatJaiphRunningBannerLines(
+  fileBasename: string,
+  dockerEnabled: boolean,
+  sandboxMode: SandboxMode | null,
+  colorEnabled: boolean,
+): string {
+  let parenInner: string;
+  if (!dockerEnabled) {
+    parenInner = "no sandbox";
+  } else if (isCiEnvironment()) {
+    parenInner = "Docker sandbox, …";
+  } else {
+    parenInner = sandboxMode === "overlay" ? "Docker sandbox, fusefs" : "Docker sandbox, tmp dir";
+  }
+  const dimParen = colorize(` (${parenInner})`, "dim", colorEnabled);
+  return `\nJaiph: Running ${fileBasename}${dimParen}\n\n`;
+}
+
 export function colorize(
   text: string,
   code: "dim" | "bold" | "green" | "red",

From 37e668771947bc9cef5c6cb463501cf076b38224 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Tue, 21 Apr 2026 07:26:45 +0200
Subject: [PATCH 27/38] ci: publish Docker image only after full CI succeeds

Move GHCR runtime build from a standalone workflow into CI with needs on
test, e2e, docs-local, and e2e-wsl. Same triggers: nightly branch and v*
tags. Remove docker-publish.yml to avoid duplicate pushes.

Made-with: Cursor
---
 .github/workflows/ci.yml             | 53 ++++++++++++++++++++++++
 .github/workflows/docker-publish.yml | 60 ----------------------------
 docs/sandboxing.md                   |  2 +-
 3 files changed, 54 insertions(+), 61 deletions(-)
 delete mode 100644 .github/workflows/docker-publish.yml

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 289bfaee..1637bb51 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -186,3 +186,56 @@ jobs:
         shell: pwsh
         run: |
           Write-Host "No WSL distro found on this runner image; skipping WSL E2E."
+
+  docker-publish:
+    name: Publish Docker runtime image
+    needs: [test, e2e, docs-local, e2e-wsl]
+    if: github.ref == 'refs/heads/nightly' || startsWith(github.ref, 'refs/tags/v')
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    env:
+      REGISTRY: ghcr.io
+      IMAGE_NAME: jaiphlang/jaiph-runtime
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Log in to GHCR
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Image tags and jaiph ref
+        id: meta
+        run: |
+          if [[ "${GITHUB_REF}" == refs/tags/v* ]]; then
+            VERSION="${GITHUB_REF_NAME#v}"
+            echo "tags=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${VERSION},${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest" >> "$GITHUB_OUTPUT"
+            echo "jaiph_ref=v${VERSION}" >> "$GITHUB_OUTPUT"
+          else
+            echo "tags=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:nightly" >> "$GITHUB_OUTPUT"
+            echo "jaiph_ref=nightly" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Build and push
+        uses: docker/build-push-action@v6
+        with:
+          context: runtime
+          file: runtime/Dockerfile
+          push: true
+          platforms: linux/amd64,linux/arm64
+          tags: ${{ steps.meta.outputs.tags }}
+          build-args: |
+            JAIPH_REPO_REF=${{ steps.meta.outputs.jaiph_ref }}
+
+      - name: Verify pushed image contains jaiph
+        run: |
+          TAG="$(echo '${{ steps.meta.outputs.tags }}' | cut -d',' -f1)"
+          docker run --rm --entrypoint sh "${TAG}" -lc "command -v jaiph && jaiph --version"
diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
deleted file mode 100644
index 58821143..00000000
--- a/.github/workflows/docker-publish.yml
+++ /dev/null
@@ -1,60 +0,0 @@
-name: Publish Docker runtime image
-
-on:
-  push:
-    branches: [nightly]
-    tags: ["v*"]
-
-permissions:
-  contents: read
-  packages: write
-
-env:
-  REGISTRY: ghcr.io
-  IMAGE_NAME: jaiphlang/jaiph-runtime
-
-jobs:
-  publish:
-    name: Build and push jaiph-runtime
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Log in to GHCR
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Image tags and jaiph ref
-        id: meta
-        run: |
-          if [[ "${GITHUB_REF}" == refs/tags/v* ]]; then
-            VERSION="${GITHUB_REF_NAME#v}"
-            echo "tags=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${VERSION},${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest" >> "$GITHUB_OUTPUT"
-            echo "jaiph_ref=v${VERSION}" >> "$GITHUB_OUTPUT"
-          else
-            echo "tags=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:nightly" >> "$GITHUB_OUTPUT"
-            echo "jaiph_ref=nightly" >> "$GITHUB_OUTPUT"
-          fi
-
-      - name: Build and push
-        uses: docker/build-push-action@v6
-        with:
-          context: runtime
-          file: runtime/Dockerfile
-          push: true
-          platforms: linux/amd64,linux/arm64
-          tags: ${{ steps.meta.outputs.tags }}
-          build-args: |
-            JAIPH_REPO_REF=${{ steps.meta.outputs.jaiph_ref }}
-
-      - name: Verify pushed image contains jaiph
-        run: |
-          TAG="$(echo '${{ steps.meta.outputs.tags }}' | cut -d',' -f1)"
-          docker run --rm --entrypoint sh "${TAG}" -lc "command -v jaiph && jaiph --version"
diff --git a/docs/sandboxing.md b/docs/sandboxing.md
index 949ff263..238806d1 100644
--- a/docs/sandboxing.md
+++ b/docs/sandboxing.md
@@ -204,7 +204,7 @@ Jaiph publishes official runtime images to GHCR:
 | `ghcr.io/jaiphlang/jaiph-runtime:nightly` | `nightly` branch | Contributors and CI |
 | `ghcr.io/jaiphlang/jaiph-runtime:latest` | Latest release tag | Convenience alias |
 
-The default `runtime.docker_image` is `ghcr.io/jaiphlang/jaiph-runtime:<version>` where `<version>` matches the installed jaiph package version. Published tags (`:<semver>`, `:nightly`, `:latest`) are built from the `runtime/Dockerfile` in the jaiph repository (see `.github/workflows/docker-publish.yml`). The image includes Node.js, jaiph, `fuse-overlayfs`, agent CLIs where that Dockerfile installs them, and a non-root `jaiph` user (UID 10001).
+The default `runtime.docker_image` is `ghcr.io/jaiphlang/jaiph-runtime:<version>` where `<version>` matches the installed jaiph package version. Published tags (`:<semver>`, `:nightly`, `:latest`) are built from the `runtime/Dockerfile` in the jaiph repository (see the `docker-publish` job in `.github/workflows/ci.yml`). The image includes Node.js, jaiph, `fuse-overlayfs`, agent CLIs where that Dockerfile installs them, and a non-root `jaiph` user (UID 10001).
 
 ### Custom images and `jaiph run`
 

From 7f1a04fcddacacfd00ba1978c30d0bc7df42be7f Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Tue, 21 Apr 2026 07:32:45 +0200
Subject: [PATCH 28/38] CI/runtime: enable Docker by default in CI; rename
 copy-mode banner to "tmp workspace"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two coupled changes that fix the chromium landing-page test (say-hello /
failure block was getting "(no sandbox)" because CI=true silently
disabled Docker, so the docs sample comparing against
"(Docker sandbox, fusefs)" diverged):

1. resolveDockerConfig: CI=true no longer disables Docker. The only
   environment-driven escape hatch is now JAIPH_UNSAFE=true. Rationale:
   landing-page e2e and docs sample tests must exercise the same
   sandbox path users do — silently dropping the sandbox in CI hides
   real regressions. Explicit overrides (JAIPH_DOCKER_ENABLED env or
   in-file runtime.docker_enabled) still take precedence.

2. formatJaiphRunningBannerLines: copy-mode label "tmp dir" →
   "tmp workspace" (clearer about what it actually is — a writable
   per-run clone of the workspace) and dropped the CI-only "…"
   obfuscation. The banner now always reflects the real sandbox mode
   so the docs/landing-page samples can compare against literal text.

Side effects handled:
- test/signal-lifecycle.test.ts: switched from CI="true" to
  JAIPH_UNSAFE="true" for the "exit-within-5s" assertion (it relied
  on Docker being disabled).
- src/runtime/docker.test.ts: rewrote the three CI-related cases to
  document the new contract (CI=true keeps Docker on; in-file and env
  overrides still win).
- src/cli/run/display.test.ts: dropped the obfuscation test,
  refreshed the copy-mode test for the new label, added a parity
  test that the banner is identical in CI and locally.
- docs/sandboxing.md, docs/configuration.md: updated the default
  rule table, configuration key descriptions, and precedence text.

e2e tests are unaffected: e2e/lib/common.sh already pins
JAIPH_DOCKER_ENABLED=false and JAIPH_UNSAFE=true for non-Docker tests,
and Docker-specific tests already set JAIPH_DOCKER_ENABLED=true
explicitly. No tooling changes needed there.

Made-with: Cursor
---
 docs/configuration.md         |  4 +--
 docs/sandboxing.md            | 14 +++++------
 src/cli/run/display.test.ts   | 46 +++++++++++------------------------
 src/cli/run/display.ts        | 12 +++------
 src/runtime/docker.test.ts    | 16 ++++++------
 src/runtime/docker.ts         | 16 ++++++------
 test/signal-lifecycle.test.ts |  2 +-
 7 files changed, 43 insertions(+), 67 deletions(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index e6ef897d..2ad56e05 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -171,7 +171,7 @@ These configure Docker sandboxing. Unlike agent and run keys, runtime keys are r
 
 | Key | Type | Default | Env variable | Description |
 |-----|------|---------|--------------|-------------|
-| `runtime.docker_enabled` | boolean | `true` locally; `false` when `CI=true` or `JAIPH_UNSAFE=true` | `JAIPH_DOCKER_ENABLED` | Enable Docker for this run. See [Sandboxing -- Enabling Docker](sandboxing.md#enabling-docker) for the default rule. |
+| `runtime.docker_enabled` | boolean | `true` by default (incl. CI); `false` only when `JAIPH_UNSAFE=true` | `JAIPH_DOCKER_ENABLED` | Enable Docker for this run. See [Sandboxing -- Enabling Docker](sandboxing.md#enabling-docker) for the default rule. |
 | `runtime.docker_image` | string | `ghcr.io/jaiphlang/jaiph-runtime:<version>` | `JAIPH_DOCKER_IMAGE` | Image name. Must already contain `jaiph`. When unset, uses the official GHCR image tag matching the installed jaiph version. For a custom image, build and push (or tag locally), then set this key or `JAIPH_DOCKER_IMAGE`. |
 | `runtime.docker_network` | string | `default` | `JAIPH_DOCKER_NETWORK` | Docker network mode. |
 | `runtime.docker_timeout` | integer | `300` | `JAIPH_DOCKER_TIMEOUT` | Timeout in seconds. Invalid or unparsable values fall back to the default. |
@@ -188,7 +188,7 @@ For **agent and run keys**, resolution order (highest wins):
 3. **Module-level `config`** — applies to workflows that don't define their own block.
 4. **Built-in defaults.**
 
-For **Docker / `runtime.*` keys**, the `jaiph run` driver merges **`JAIPH_DOCKER_*` env > module-level `runtime.*` > CI/unsafe default rule**. The default rule enables Docker when neither `CI=true` nor `JAIPH_UNSAFE=true` is set (see [Sandboxing -- Enabling Docker](sandboxing.md#enabling-docker)). Mounts (`runtime.workspace`) are never taken from env. Workflow-level config cannot set runtime keys.
+For **Docker / `runtime.*` keys**, the `jaiph run` driver merges **`JAIPH_DOCKER_*` env > module-level `runtime.*` > unsafe default rule**. The default rule enables Docker unless `JAIPH_UNSAFE=true` is set; `CI=true` no longer disables Docker (see [Sandboxing -- Enabling Docker](sandboxing.md#enabling-docker)). Mounts (`runtime.workspace`) are never taken from env. Workflow-level config cannot set runtime keys.
 
 ### Locked variables
 
diff --git a/docs/sandboxing.md b/docs/sandboxing.md
index 238806d1..89e54298 100644
--- a/docs/sandboxing.md
+++ b/docs/sandboxing.md
@@ -53,7 +53,7 @@ In both modes, run artifacts are written to a separate rw mount at `/jaiph/run`
 
 ### Enabling Docker
 
-Docker sandboxing is **on by default** for local development. When neither `CI=true` nor `JAIPH_UNSAFE=true` is set in the environment, `runtime.docker_enabled` defaults to `true`. In CI environments (`CI=true`) or when the user explicitly opts out with `JAIPH_UNSAFE=true`, the default flips to `false`.
+Docker sandboxing is **on by default** for both local development and CI. When `JAIPH_UNSAFE=true` is not set, `runtime.docker_enabled` defaults to `true`. CI environments (`CI=true`) deliberately exercise the same sandbox path users do — landing-page e2e tests and docs sample tests would otherwise skip the sandbox in CI and miss real regressions. The only environment-driven escape hatch is `JAIPH_UNSAFE=true`.
 
 To disable Docker for a local run without setting an environment variable, set `runtime.docker_enabled = false` in a module-level `config` block:
 
@@ -65,14 +65,14 @@ config {
 
 `runtime.*` keys belong only in module-level config. Placing them in a workflow-level `config` block is a parse error.
 
-The environment variable `JAIPH_DOCKER_ENABLED` overrides both the in-file setting and the CI/unsafe default when set: only the literal string `"true"` enables Docker; any other value disables it. `JAIPH_UNSAFE=true` is the explicit "run on host / skip Docker default" escape hatch for local development when Docker is unwanted.
+The environment variable `JAIPH_DOCKER_ENABLED` overrides both the in-file setting and the unsafe default when set: only the literal string `"true"` enables Docker; any other value disables it. `JAIPH_UNSAFE=true` is the explicit "run on host / skip Docker default" escape hatch for local development when Docker is unwanted.
 
 **Default rule (when no explicit `JAIPH_DOCKER_ENABLED` or in-file `runtime.docker_enabled` is set):**
 
 | Environment | Default |
 |-------------|---------|
-| Plain local (no `CI`, no `JAIPH_UNSAFE`) | Docker **on** |
-| `CI=true` | Docker **off** |
+| Plain local (no `JAIPH_UNSAFE`) | Docker **on** |
+| `CI=true` | Docker **on** (CI exercises the same sandbox path as users) |
 | `JAIPH_UNSAFE=true` | Docker **off** |
 
 Explicit overrides (`JAIPH_DOCKER_ENABLED` env or in-file `runtime.docker_enabled`) always take precedence over the default rule.
@@ -85,7 +85,7 @@ All Docker-related keys live under `runtime.*` in module-level config:
 
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
-| `runtime.docker_enabled` | boolean | `true` locally; `false` when `CI=true` or `JAIPH_UNSAFE=true` | Enable Docker sandbox for the run. |
+| `runtime.docker_enabled` | boolean | `true` by default (incl. CI); `false` only when `JAIPH_UNSAFE=true` | Enable Docker sandbox for the run. |
 | `runtime.docker_image` | string | `"ghcr.io/jaiphlang/jaiph-runtime:<version>"` | Container image. Must already contain `jaiph`. Defaults to the official GHCR runtime image matching the installed jaiph version. |
 | `runtime.docker_network` | string | `"default"` | Docker network mode. |
 | `runtime.docker_timeout` | integer | `300` | Max execution time in seconds. `0` disables the timeout. |
@@ -95,9 +95,9 @@ Each key is type-checked at parse time. Unknown keys produce `E_PARSE`.
 
 #### Environment variable overrides
 
-Following the `JAIPH_*` convention: `JAIPH_DOCKER_ENABLED`, `JAIPH_DOCKER_IMAGE`, `JAIPH_DOCKER_NETWORK`, `JAIPH_DOCKER_TIMEOUT`. Additionally, `CI` and `JAIPH_UNSAFE` affect the default for `runtime.docker_enabled` (see [Enabling Docker](#enabling-docker)). Workspace mounts are not overridable via environment.
+Following the `JAIPH_*` convention: `JAIPH_DOCKER_ENABLED`, `JAIPH_DOCKER_IMAGE`, `JAIPH_DOCKER_NETWORK`, `JAIPH_DOCKER_TIMEOUT`. Additionally, `JAIPH_UNSAFE` affects the default for `runtime.docker_enabled` (see [Enabling Docker](#enabling-docker)). `CI=true` does **not** affect the default — CI runs use the same sandbox path users do. Workspace mounts are not overridable via environment.
 
-Precedence: `JAIPH_DOCKER_ENABLED` env > in-file config > CI/unsafe default rule.
+Precedence: `JAIPH_DOCKER_ENABLED` env > in-file config > unsafe default rule.
 
 If `JAIPH_DOCKER_TIMEOUT` is set but not a valid integer, the default (`300`) is used.
 
diff --git a/src/cli/run/display.test.ts b/src/cli/run/display.test.ts
index 6470260f..0226d8b6 100644
--- a/src/cli/run/display.test.ts
+++ b/src/cli/run/display.test.ts
@@ -16,36 +16,25 @@ test("formatJaiphRunningBannerLines: no Docker shows no sandbox (no color)", ()
   assert.equal(s, "\nJaiph: Running say_hello.jh (no sandbox)\n\n");
 });
 
-test("formatJaiphRunningBannerLines: Docker overlay shows fusefs locally (no color)", () => {
-  const prev = process.env.CI;
-  delete process.env.CI;
-  try {
-    const s = formatJaiphRunningBannerLines("say_hello.jh", true, "overlay", false);
-    assert.equal(s, "\nJaiph: Running say_hello.jh (Docker sandbox, fusefs)\n\n");
-  } finally {
-    if (prev === undefined) delete process.env.CI;
-    else process.env.CI = prev;
-  }
+test("formatJaiphRunningBannerLines: Docker overlay shows fusefs (no color)", () => {
+  const s = formatJaiphRunningBannerLines("say_hello.jh", true, "overlay", false);
+  assert.equal(s, "\nJaiph: Running say_hello.jh (Docker sandbox, fusefs)\n\n");
 });
 
-test("formatJaiphRunningBannerLines: Docker copy shows tmp dir locally (no color)", () => {
-  const prev = process.env.CI;
-  delete process.env.CI;
-  try {
-    const s = formatJaiphRunningBannerLines("say_hello.jh", true, "copy", false);
-    assert.equal(s, "\nJaiph: Running say_hello.jh (Docker sandbox, tmp dir)\n\n");
-  } finally {
-    if (prev === undefined) delete process.env.CI;
-    else process.env.CI = prev;
-  }
+test("formatJaiphRunningBannerLines: Docker copy shows tmp workspace (no color)", () => {
+  const s = formatJaiphRunningBannerLines("say_hello.jh", true, "copy", false);
+  assert.equal(s, "\nJaiph: Running say_hello.jh (Docker sandbox, tmp workspace)\n\n");
 });
 
-test("formatJaiphRunningBannerLines: CI obfuscates Docker sandbox detail", () => {
+test("formatJaiphRunningBannerLines: banner is the same in CI and locally (no obfuscation)", () => {
   const prev = process.env.CI;
   process.env.CI = "true";
   try {
-    const s = formatJaiphRunningBannerLines("say_hello.jh", true, "overlay", false);
-    assert.equal(s, "\nJaiph: Running say_hello.jh (Docker sandbox, …)\n\n");
+    const sCi = formatJaiphRunningBannerLines("say_hello.jh", true, "overlay", false);
+    delete process.env.CI;
+    const sLocal = formatJaiphRunningBannerLines("say_hello.jh", true, "overlay", false);
+    assert.equal(sCi, sLocal);
+    assert.equal(sCi, "\nJaiph: Running say_hello.jh (Docker sandbox, fusefs)\n\n");
   } finally {
     if (prev === undefined) delete process.env.CI;
     else process.env.CI = prev;
@@ -53,15 +42,8 @@ test("formatJaiphRunningBannerLines: CI obfuscates Docker sandbox detail", () =>
 });
 
 test("formatJaiphRunningBannerLines: dim ANSI wraps parenthetical when color on", () => {
-  const prev = process.env.CI;
-  delete process.env.CI;
-  try {
-    const s = formatJaiphRunningBannerLines("x.jh", false, null, true);
-    assert.ok(s.includes("\u001b[2m (no sandbox)\u001b[0m"));
-  } finally {
-    if (prev === undefined) delete process.env.CI;
-    else process.env.CI = prev;
-  }
+  const s = formatJaiphRunningBannerLines("x.jh", false, null, true);
+  assert.ok(s.includes("\u001b[2m (no sandbox)\u001b[0m"));
 });
 
 // === colorize ===
diff --git a/src/cli/run/display.ts b/src/cli/run/display.ts
index 62e40e56..84984baf 100644
--- a/src/cli/run/display.ts
+++ b/src/cli/run/display.ts
@@ -4,14 +4,10 @@ import type { SandboxMode } from "../../runtime/docker";
 const PROMPT_PREVIEW_MAX = 24;
 const PROMPT_ARGS_DISPLAY_MAX = 96;
 
-function isCiEnvironment(): boolean {
-  const c = process.env.CI;
-  return c === "true" || c === "1";
-}
-
 /**
  * First stdout lines for `jaiph run`: file name plus a dim parenthetical describing
- * Docker sandbox mode. In CI, the host-dependent detail (fuse vs copy) is redacted.
+ * Docker sandbox mode. The label always reflects the actual mode (no CI obfuscation)
+ * so docs/landing-page samples can compare against the literal banner text.
  */
 export function formatJaiphRunningBannerLines(
   fileBasename: string,
@@ -22,10 +18,8 @@ export function formatJaiphRunningBannerLines(
   let parenInner: string;
   if (!dockerEnabled) {
     parenInner = "no sandbox";
-  } else if (isCiEnvironment()) {
-    parenInner = "Docker sandbox, …";
   } else {
-    parenInner = sandboxMode === "overlay" ? "Docker sandbox, fusefs" : "Docker sandbox, tmp dir";
+    parenInner = sandboxMode === "overlay" ? "Docker sandbox, fusefs" : "Docker sandbox, tmp workspace";
   }
   const dimParen = colorize(` (${parenInner})`, "dim", colorEnabled);
   return `\nJaiph: Running ${fileBasename}${dimParen}\n\n`;
diff --git a/src/runtime/docker.test.ts b/src/runtime/docker.test.ts
index 51dbe90d..b4324b98 100644
--- a/src/runtime/docker.test.ts
+++ b/src/runtime/docker.test.ts
@@ -173,19 +173,19 @@ test("resolveDockerConfig: env overrides in-file", () => {
   assert.equal(cfg.image, "debian:12");
 });
 
-test("resolveDockerConfig: CI=true disables Docker by default", () => {
+test("resolveDockerConfig: CI=true does NOT disable Docker (CI runs the real sandbox path)", () => {
   const cfg = resolveDockerConfig(undefined, { CI: "true" });
-  assert.equal(cfg.enabled, false);
+  assert.equal(cfg.enabled, true);
 });
 
-test("resolveDockerConfig: CI=true with in-file override enables Docker", () => {
-  const cfg = resolveDockerConfig({ dockerEnabled: true }, { CI: "true" });
-  assert.equal(cfg.enabled, true);
+test("resolveDockerConfig: CI=true with in-file dockerEnabled=false respects the in-file override", () => {
+  const cfg = resolveDockerConfig({ dockerEnabled: false }, { CI: "true" });
+  assert.equal(cfg.enabled, false);
 });
 
-test("resolveDockerConfig: env JAIPH_DOCKER_ENABLED=true overrides CI default", () => {
-  const cfg = resolveDockerConfig(undefined, { CI: "true", JAIPH_DOCKER_ENABLED: "true" });
-  assert.equal(cfg.enabled, true);
+test("resolveDockerConfig: env JAIPH_DOCKER_ENABLED=false disables even when CI=true and in-file enables", () => {
+  const cfg = resolveDockerConfig({ dockerEnabled: true }, { CI: "true", JAIPH_DOCKER_ENABLED: "false" });
+  assert.equal(cfg.enabled, false);
 });
 
 test("resolveDockerConfig: JAIPH_UNSAFE=true disables Docker by default", () => {
diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts
index 4451b348..23126d04 100644
--- a/src/runtime/docker.ts
+++ b/src/runtime/docker.ts
@@ -146,27 +146,27 @@ const DEFAULTS: DockerRunConfig = {
 
 /**
  * Resolve effective Docker config.
- * Precedence: env vars (`JAIPH_DOCKER_*`) > in-file RuntimeConfig > CI/unsafe default rule.
+ * Precedence: env vars (`JAIPH_DOCKER_*`) > in-file RuntimeConfig > unsafe default rule.
  *
  * Default rule (when no explicit override is set):
- *  - `CI=true` or `JAIPH_UNSAFE=true` → Docker off
- *  - Otherwise → Docker on
+ *  - `JAIPH_UNSAFE=true` → Docker off (explicit "run on host" escape hatch)
+ *  - Otherwise → Docker on (including in CI; CI=true alone no longer disables Docker)
  */
 export function resolveDockerConfig(
   inFile: RuntimeConfig | undefined,
   env: Record<string, string | undefined>,
 ): DockerRunConfig {
-  // enabled: env JAIPH_DOCKER_ENABLED > in-file > CI/unsafe default rule
+  // enabled: env JAIPH_DOCKER_ENABLED > in-file > unsafe default rule
   let enabled: boolean;
   if (env.JAIPH_DOCKER_ENABLED !== undefined) {
     enabled = env.JAIPH_DOCKER_ENABLED === "true";
   } else if (inFile?.dockerEnabled !== undefined) {
     enabled = inFile.dockerEnabled;
   } else {
-    // Default: Docker on unless CI or unsafe mode is active
-    const isCI = env.CI === "true";
-    const isUnsafe = env.JAIPH_UNSAFE === "true";
-    enabled = !(isCI || isUnsafe);
+    // Default: Docker on unless the user explicitly opts out via JAIPH_UNSAFE.
+    // CI=true is intentionally not consulted — CI runs (incl. landing-page e2e
+    // and docs sample tests) should exercise the same sandbox path users do.
+    enabled = env.JAIPH_UNSAFE !== "true";
   }
 
   // image: env > in-file > default
diff --git a/test/signal-lifecycle.test.ts b/test/signal-lifecycle.test.ts
index e433a112..272f9dd2 100644
--- a/test/signal-lifecycle.test.ts
+++ b/test/signal-lifecycle.test.ts
@@ -146,7 +146,7 @@ async function runInterruptTest(
   const child = spawn("node", [cliPath, "run", workflowPath], {
     stdio: "pipe",
     cwd: root,
-    env: { ...process.env, CI: "true" }, // disable Docker so exit-within-5s assertion is reliable
+    env: { ...process.env, JAIPH_UNSAFE: "true" }, // disable Docker so exit-within-5s assertion is reliable (CI=true no longer disables)
   });
 
   const exitPromise = new Promise<{ code: number | null; signal: string | null }>((resolve) => {

From 7d7c1a6f7e609fa61525f080812a823755058bc8 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Tue, 21 Apr 2026 08:07:26 +0200
Subject: [PATCH 29/38] runtime(docker): drop privileges via setpriv after
 fuse-overlayfs mount

In overlay mode the container previously ran as the host UID, but
/jaiph/workspace is owned by the image user (jaiph UID 10001), so
fusermount3 refused to mount on a directory the calling user couldn't
write to. Apparmor was a red herring.

Fix: run the container as root (--user 0:0) so fuse-overlayfs can mount
/jaiph/workspace, then have overlay-run.sh chown /jaiph/run to the host
UID/GID and exec the workflow via setpriv. The workflow itself never
runs as root, host-readable artifacts are preserved, and copy mode is
unchanged (still --user host_uid:host_gid).

Custom images that lack setpriv print a one-line warning and run the
workflow as root inside the container.

Made-with: Cursor
---
 docs/sandboxing.md         | 11 ++++++--
 src/runtime/docker.test.ts | 55 ++++++++++++++++++++++++++++++++++++++
 src/runtime/docker.ts      | 51 +++++++++++++++++++++++++++++++----
 3 files changed, 110 insertions(+), 7 deletions(-)

diff --git a/docs/sandboxing.md b/docs/sandboxing.md
index 89e54298..52e34c77 100644
--- a/docs/sandboxing.md
+++ b/docs/sandboxing.md
@@ -26,7 +26,7 @@ Docker sandboxing is designed to contain damage from untrusted or semi-trusted w
 **What Docker protects against:**
 
 - **Filesystem access** -- Scripts inside the container cannot read or write arbitrary host paths. The container's `/jaiph/workspace` is either an in-container fuse-overlayfs union over a read-only bind of the host workspace (overlay mode, writes land in a tmpfs upper layer and are discarded on exit) or a host-side clone of the workspace mounted read-write (copy mode, the clone is removed on exit). Only the run-artifacts directory (`/jaiph/run`) persists writes back to the host workspace.
-- **Process isolation** -- Container processes cannot see or signal host processes. The container runs with `--cap-drop ALL` (overlay mode re-adds `SYS_ADMIN` for fuse-overlayfs; copy mode runs without it) and `--security-opt no-new-privileges` to prevent privilege escalation.
+- **Process isolation** -- Container processes cannot see or signal host processes. The container runs with `--cap-drop ALL` (overlay mode re-adds `SYS_ADMIN` for fuse-overlayfs; copy mode runs without it) and `--security-opt no-new-privileges` to prevent privilege escalation. In overlay mode the container starts as root only long enough for the entrypoint script to mount fuse-overlayfs; the workflow itself is then exec'd via `setpriv` as the host UID/GID so neither the workflow nor anything it spawns runs as root.
 - **Credential leakage** -- Sensitive host environment variables (`SSH_*`, `GPG_*`, `AWS_*`, `GCP_*`, `AZURE_*`, `GOOGLE_*`, `DOCKER_*`, `KUBE*`, `NPM_TOKEN*`) are never forwarded into the container. Only `JAIPH_*` (except `JAIPH_DOCKER_*`) and agent prefixes (`ANTHROPIC_*`, `CLAUDE_*`, `CURSOR_*`) cross the container boundary.
 - **Mount safety** -- The host root filesystem (`/`), Docker socket (`/var/run/docker.sock`, `/run/docker.sock`), and OS internals (`/proc`, `/sys`, `/dev`) cannot be mounted into the container. Attempting to do so produces `E_VALIDATE_MOUNT`.
 
@@ -150,7 +150,14 @@ The working directory is `/jaiph/workspace`. In overlay mode the host CLI genera
 
 ### Runtime behavior
 
-**Container lifecycle** -- `docker run --rm` launches the container and auto-removes it on exit. `--cap-drop ALL` drops all Linux capabilities; overlay mode re-adds `SYS_ADMIN` (required for fuse-overlayfs) and exposes `/dev/fuse` via `--device`, while copy mode runs without either. `--security-opt no-new-privileges` prevents any process inside the container from gaining additional privileges. The pseudo-TTY flag (`-t`) is intentionally omitted: Docker's `-t` merges stderr into stdout, which would break the `__JAIPH_EVENT__` stderr-only live contract. On Linux, `--user <uid>:<gid>` maps the container user to the host user.
+**Container lifecycle** -- `docker run --rm` launches the container and auto-removes it on exit. `--cap-drop ALL` drops all Linux capabilities; overlay mode re-adds `SYS_ADMIN` (required for fuse-overlayfs) and exposes `/dev/fuse` via `--device`, while copy mode runs without either. `--security-opt no-new-privileges` prevents any process inside the container from gaining additional privileges. The pseudo-TTY flag (`-t`) is intentionally omitted: Docker's `-t` merges stderr into stdout, which would break the `__JAIPH_EVENT__` stderr-only live contract.
+
+**UID/GID handling on Linux:**
+
+- **Copy mode** -- the container runs directly as `--user <host_uid>:<host_gid>` so writes to the cloned workspace and `/jaiph/run` land owned by the host user.
+- **Overlay mode** -- the container starts as `--user 0:0` because `fuse-overlayfs` needs root to mount `/jaiph/workspace` (the image's `USER jaiph` cannot, since it doesn't own the mountpoint after the host UID is forwarded). The host CLI passes `JAIPH_HOST_UID` and `JAIPH_HOST_GID` env vars; `overlay-run.sh` mounts the overlay, `chown`s `/jaiph/run` to the host user, then `exec`s the workflow via `setpriv --reuid=$JAIPH_HOST_UID --regid=$JAIPH_HOST_GID --clear-groups -- jaiph run --raw …`. The workflow itself never runs as root. Custom images that lack `setpriv` print a one-line warning and run the workflow as root inside the container; install `util-linux` (which provides `setpriv`) to restore the privilege drop.
+
+On macOS Docker Desktop the VM transparently translates UIDs across the bind-mount boundary, so no `--user` override is applied.
 
 **stdin** -- The `docker run` process is spawned with stdin set to `ignore` to prevent the Docker CLI from blocking on stdin EOF.
 
diff --git a/src/runtime/docker.test.ts b/src/runtime/docker.test.ts
index b4324b98..8ac0ae02 100644
--- a/src/runtime/docker.test.ts
+++ b/src/runtime/docker.test.ts
@@ -440,6 +440,7 @@ test("writeOverlayScript: creates executable script with fuse-overlayfs setup",
     assert.ok(content.startsWith("#!/usr/bin/env bash"));
     assert.ok(content.includes("fuse-overlayfs -o"));
     assert.ok(content.includes("lowerdir=$LOWER,upperdir=$UPPER,workdir=$WORK"));
+    assert.ok(content.includes("allow_other"), "allow_other so non-root user can access overlay after privilege drop");
     assert.ok(content.includes('exec "$@"'));
     assert.ok(content.includes("E_DOCKER_OVERLAY"));
   } finally {
@@ -447,6 +448,21 @@ test("writeOverlayScript: creates executable script with fuse-overlayfs setup",
   }
 });
 
+test("writeOverlayScript: drops privileges via setpriv when JAIPH_HOST_UID/GID provided", () => {
+  const scriptPath = writeOverlayScript();
+  try {
+    const content = readFileSync(scriptPath, "utf8");
+    assert.ok(content.includes("JAIPH_HOST_UID"), "honors JAIPH_HOST_UID");
+    assert.ok(content.includes("JAIPH_HOST_GID"), "honors JAIPH_HOST_GID");
+    assert.ok(content.includes("setpriv"), "uses setpriv to drop privileges");
+    assert.ok(content.includes("--reuid="), "passes --reuid to setpriv");
+    assert.ok(content.includes("--regid="), "passes --regid to setpriv");
+    assert.ok(content.includes("chown"), "chowns /jaiph/run so artifacts end up host-owned");
+  } finally {
+    rmSync(dirname(scriptPath), { recursive: true, force: true });
+  }
+});
+
 test("writeOverlayScript: contains no in-container rsync/cp fallback (host handles it now)", () => {
   const scriptPath = writeOverlayScript();
   try {
@@ -728,6 +744,45 @@ test("buildDockerArgs: throws when overlay mode is selected without script path"
   assert.throws(() => buildDockerArgs(defaultOpts({ sandboxMode: "overlay" })), /overlay mode requires/);
 });
 
+// ---------------------------------------------------------------------------
+// buildDockerArgs: UID/GID handling (Linux only)
+// ---------------------------------------------------------------------------
+
+test("buildDockerArgs: overlay mode runs container as root and forwards JAIPH_HOST_UID/GID (Linux)", () => {
+  if (process.platform !== "linux") return;
+  const args = buildDockerArgs(defaultOpts(), TEST_OVERLAY);
+
+  const userIdx = args.indexOf("--user");
+  assert.ok(userIdx >= 0, "--user flag present");
+  assert.equal(args[userIdx + 1], "0:0", "container runs as root so fuse-overlayfs can mount /jaiph/workspace");
+
+  const envFlags = args
+    .map((v, i) => (v === "-e" ? args[i + 1] : null))
+    .filter((v): v is string => v !== null);
+  assert.ok(envFlags.some((v) => v.startsWith("JAIPH_HOST_UID=")), "JAIPH_HOST_UID forwarded to container");
+  assert.ok(envFlags.some((v) => v.startsWith("JAIPH_HOST_GID=")), "JAIPH_HOST_GID forwarded to container");
+});
+
+test("buildDockerArgs: copy mode runs as host UID:GID and does NOT forward JAIPH_HOST_UID/GID (Linux)", () => {
+  if (process.platform !== "linux") return;
+  const cloneDir = mkdtempSync(join(tmpdir(), "jaiph-test-clone-"));
+  try {
+    const args = buildDockerArgs(copyOpts(cloneDir));
+
+    const userIdx = args.indexOf("--user");
+    assert.ok(userIdx >= 0, "--user flag present in copy mode");
+    assert.notEqual(args[userIdx + 1], "0:0", "copy mode runs as host UID, not root");
+
+    const envFlags = args
+      .map((v, i) => (v === "-e" ? args[i + 1] : null))
+      .filter((v): v is string => v !== null);
+    assert.ok(!envFlags.some((v) => v.startsWith("JAIPH_HOST_UID=")), "no privilege-drop env in copy mode");
+    assert.ok(!envFlags.some((v) => v.startsWith("JAIPH_HOST_GID=")), "no privilege-drop env in copy mode");
+  } finally {
+    rmSync(cloneDir, { recursive: true, force: true });
+  }
+});
+
 test("buildDockerArgs: throws when copy mode is selected without sandboxWorkspaceDir", () => {
   assert.throws(
     () => buildDockerArgs(defaultOpts({ sandboxMode: "copy", sandboxWorkspaceDir: undefined })),
diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts
index 23126d04..2c88eac1 100644
--- a/src/runtime/docker.ts
+++ b/src/runtime/docker.ts
@@ -319,6 +319,7 @@ LOWER=/jaiph/workspace-ro
 UPPER=/tmp/overlay-upper
 WORK=/tmp/overlay-work
 MERGED=/jaiph/workspace
+RUN_DIR=/jaiph/run
 mkdir -p "$UPPER" "$WORK" "$MERGED"
 
 if ! command -v fuse-overlayfs >/dev/null 2>&1; then
@@ -329,13 +330,30 @@ if [ ! -e /dev/fuse ]; then
   printf 'E_DOCKER_OVERLAY /dev/fuse not present in container; pass --device /dev/fuse or set JAIPH_DOCKER_NO_OVERLAY=1 to use the copy sandbox path\\n' >&2
   exit 78
 fi
-if ! fuse-overlayfs -o "lowerdir=$LOWER,upperdir=$UPPER,workdir=$WORK" "$MERGED" 2>/tmp/jaiph-fuse-overlay.err; then
+# allow_other so a non-root user can read/write through the overlay after the
+# privilege drop below. As root we don't need /etc/fuse.conf user_allow_other.
+if ! fuse-overlayfs -o "lowerdir=$LOWER,upperdir=$UPPER,workdir=$WORK,allow_other" "$MERGED" 2>/tmp/jaiph-fuse-overlay.err; then
   reason="$(tr '\\n' ' ' </tmp/jaiph-fuse-overlay.err | sed 's/[[:space:]]\\+/ /g; s/^ //; s/ $//')"
   printf 'E_DOCKER_OVERLAY fuse-overlayfs mount failed: %s\\n' "$reason" >&2
   exit 78
 fi
 
 cd "$MERGED"
+
+# Privilege drop: overlay setup needs root, but the workflow itself shouldn't
+# run as root. When the host passes JAIPH_HOST_UID/GID we chown /jaiph/run so
+# artifacts land owned by the host user (readable outside the container) and
+# exec via setpriv. If setpriv isn't installed (custom images) we fall back to
+# running as root with a single warning — security posture matches the legacy
+# overlay path.
+if [ -n "\${JAIPH_HOST_UID:-}" ] && [ -n "\${JAIPH_HOST_GID:-}" ]; then
+  chown -R "$JAIPH_HOST_UID:$JAIPH_HOST_GID" "$RUN_DIR" 2>/dev/null || true
+  if command -v setpriv >/dev/null 2>&1; then
+    exec setpriv --reuid="$JAIPH_HOST_UID" --regid="$JAIPH_HOST_GID" --clear-groups -- "$@"
+  fi
+  printf 'jaiph: setpriv not found in image; running workflow as root inside container\\n' >&2
+fi
+
 exec "$@"
 `;
 
@@ -615,13 +633,30 @@ export function buildDockerArgs(opts: DockerSpawnOptions, overlayScriptPath?: st
     }
   }
 
+  // UID/GID strategy:
+  //   copy mode  → run as host UID:GID directly so writes to the cloned
+  //                workspace and /jaiph/run land owned by the host user.
+  //   overlay mode → run as root (0:0) so fuse-overlayfs can mount and own
+  //                  /jaiph/workspace, then the entrypoint script drops
+  //                  privileges to JAIPH_HOST_UID/GID via setpriv before
+  //                  exec'ing the workflow. We set --user 0:0 explicitly to
+  //                  override the image's USER directive (e.g. the official
+  //                  image runs as `jaiph` UID 10001 by default).
+  // Linux-only: macOS Docker Desktop translates UIDs across the VM boundary,
+  // so we don't override --user there.
+  let hostUid: string | undefined;
+  let hostGid: string | undefined;
   if (process.platform === "linux") {
     try {
-      const uid = execSync("id -u", { encoding: "utf8" }).trim();
-      const gid = execSync("id -g", { encoding: "utf8" }).trim();
-      args.push("--user", `${uid}:${gid}`);
+      hostUid = execSync("id -u", { encoding: "utf8" }).trim();
+      hostGid = execSync("id -g", { encoding: "utf8" }).trim();
     } catch {
-      // Fall through without --user
+      // Fall through without --user / JAIPH_HOST_*
+    }
+    if (mode === "copy" && hostUid && hostGid) {
+      args.push("--user", `${hostUid}:${hostGid}`);
+    } else if (mode === "overlay") {
+      args.push("--user", "0:0");
     }
   }
 
@@ -676,6 +711,12 @@ export function buildDockerArgs(opts: DockerSpawnOptions, overlayScriptPath?: st
     }
   }
 
+  // Tell the in-container overlay script which UID/GID to drop to.
+  if (mode === "overlay" && hostUid && hostGid) {
+    args.push("-e", `JAIPH_HOST_UID=${hostUid}`);
+    args.push("-e", `JAIPH_HOST_GID=${hostGid}`);
+  }
+
   args.push("-w", CONTAINER_WORKSPACE);
   args.push(opts.config.image);
 

From 940a3bd50c96ab1dcae696ae36e802a67e3a8c93 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Tue, 21 Apr 2026 10:01:54 +0200
Subject: [PATCH 30/38] runtime(docker): run as host UID throughout, drop root
 + setpriv dance
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously overlay mode ran the container as root so fuse-overlayfs could
mount /jaiph/workspace, then dropped to the host UID via setpriv. That
needed SYS_ADMIN + SETUID + SETGID + CHOWN caps and an entrypoint script
that did chown + setpriv.

Make the runtime image's mountpoints (/jaiph/workspace, /jaiph/run)
mode 1777, so the container can run as the host UID from the start —
fuse-overlayfs has the write access it needs to mount, no root required.

Net change: caps shrink to just SYS_ADMIN, OVERLAY_SCRIPT shrinks back
to mount → cd → exec, no JAIPH_HOST_UID/GID env contract, ~40 fewer
lines in docker.ts. Custom images that lack a 1777 mountpoint (or a
USER that owns it) need to set JAIPH_DOCKER_NO_OVERLAY=1.

Made-with: Cursor
---
 docker/Dockerfile.runtime  | 11 ++++-
 docs/sandboxing.md         |  9 ++--
 src/runtime/docker.test.ts | 88 +++++++++++++++++++++++---------------
 src/runtime/docker.ts      | 63 ++++++++-------------------
 4 files changed, 85 insertions(+), 86 deletions(-)

diff --git a/docker/Dockerfile.runtime b/docker/Dockerfile.runtime
index c45972d3..86669f5f 100644
--- a/docker/Dockerfile.runtime
+++ b/docker/Dockerfile.runtime
@@ -25,9 +25,18 @@ RUN apt-get update && \
     rm -rf /var/lib/apt/lists/*
 
 # Non-root user for sandbox safety.
+#
+# /jaiph/workspace and /jaiph/run are mountpoints that get covered at runtime
+# (overlay or bind mount). Their bare permissions only matter before the mount
+# happens. We set them to 1777 (sticky-bit world-writable) so the container can
+# run as the *host* UID — not the image's `jaiph` UID — without having to chown
+# anything at runtime. fusermount3/fuse-overlayfs need write access to the
+# mountpoint to mount the overlay, and a host UID that doesn't match `jaiph`
+# (10001) wouldn't have it on a 0755 dir.
 RUN useradd --create-home --uid 10001 --shell /bin/bash jaiph && \
     mkdir -p /jaiph/workspace /jaiph/workspace-ro /jaiph/run && \
-    chown -R jaiph:jaiph /jaiph
+    chown -R jaiph:jaiph /jaiph && \
+    chmod 1777 /jaiph/workspace /jaiph/run
 
 # Install jaiph from the local tarball (provided at build time via --build-arg).
 # The tarball is produced by `npm pack` in CI before the docker build step.
diff --git a/docs/sandboxing.md b/docs/sandboxing.md
index 52e34c77..8b95baf2 100644
--- a/docs/sandboxing.md
+++ b/docs/sandboxing.md
@@ -26,7 +26,7 @@ Docker sandboxing is designed to contain damage from untrusted or semi-trusted w
 **What Docker protects against:**
 
 - **Filesystem access** -- Scripts inside the container cannot read or write arbitrary host paths. The container's `/jaiph/workspace` is either an in-container fuse-overlayfs union over a read-only bind of the host workspace (overlay mode, writes land in a tmpfs upper layer and are discarded on exit) or a host-side clone of the workspace mounted read-write (copy mode, the clone is removed on exit). Only the run-artifacts directory (`/jaiph/run`) persists writes back to the host workspace.
-- **Process isolation** -- Container processes cannot see or signal host processes. The container runs with `--cap-drop ALL` (overlay mode re-adds `SYS_ADMIN` for fuse-overlayfs; copy mode runs without it) and `--security-opt no-new-privileges` to prevent privilege escalation. In overlay mode the container starts as root only long enough for the entrypoint script to mount fuse-overlayfs; the workflow itself is then exec'd via `setpriv` as the host UID/GID so neither the workflow nor anything it spawns runs as root.
+- **Process isolation** -- Container processes cannot see or signal host processes. The container runs with `--cap-drop ALL` (overlay mode re-adds `SYS_ADMIN` so fuse-overlayfs can mount the union filesystem; copy mode adds nothing) and `--security-opt no-new-privileges` to prevent privilege escalation. The container always runs as the host UID/GID — never as root — so writes to bind mounts are owned by the host user.
 - **Credential leakage** -- Sensitive host environment variables (`SSH_*`, `GPG_*`, `AWS_*`, `GCP_*`, `AZURE_*`, `GOOGLE_*`, `DOCKER_*`, `KUBE*`, `NPM_TOKEN*`) are never forwarded into the container. Only `JAIPH_*` (except `JAIPH_DOCKER_*`) and agent prefixes (`ANTHROPIC_*`, `CLAUDE_*`, `CURSOR_*`) cross the container boundary.
 - **Mount safety** -- The host root filesystem (`/`), Docker socket (`/var/run/docker.sock`, `/run/docker.sock`), and OS internals (`/proc`, `/sys`, `/dev`) cannot be mounted into the container. Attempting to do so produces `E_VALIDATE_MOUNT`.
 
@@ -150,12 +150,11 @@ The working directory is `/jaiph/workspace`. In overlay mode the host CLI genera
 
 ### Runtime behavior
 
-**Container lifecycle** -- `docker run --rm` launches the container and auto-removes it on exit. `--cap-drop ALL` drops all Linux capabilities; overlay mode re-adds `SYS_ADMIN` (required for fuse-overlayfs) and exposes `/dev/fuse` via `--device`, while copy mode runs without either. `--security-opt no-new-privileges` prevents any process inside the container from gaining additional privileges. The pseudo-TTY flag (`-t`) is intentionally omitted: Docker's `-t` merges stderr into stdout, which would break the `__JAIPH_EVENT__` stderr-only live contract.
+**Container lifecycle** -- `docker run --rm` launches the container and auto-removes it on exit. `--cap-drop ALL` drops all Linux capabilities; overlay mode re-adds `SYS_ADMIN` (required for fuse-overlayfs to mount the union filesystem directly without going through fusermount3's suid path) and exposes `/dev/fuse` via `--device`, while copy mode runs without either. `--security-opt no-new-privileges` prevents any process inside the container from gaining additional privileges. The pseudo-TTY flag (`-t`) is intentionally omitted: Docker's `-t` merges stderr into stdout, which would break the `__JAIPH_EVENT__` stderr-only live contract.
 
-**UID/GID handling on Linux:**
+**UID/GID handling on Linux** -- the container always runs as `--user <host_uid>:<host_gid>` (in both overlay and copy mode), so writes to bind mounts (`/jaiph/run` always; `/jaiph/workspace` in copy mode) end up owned by the host user. There is no in-container privilege drop — the workflow runs as the host UID from the moment the container starts. This works in overlay mode because the official runtime image creates `/jaiph/workspace` (the fuse-overlayfs mountpoint) with mode `1777`, giving any UID write access to the bare mountpoint — required by fuse-overlayfs, since otherwise a host UID that doesn't match the image's `jaiph` user (10001) would be refused. The 1777 mode only matters before the overlay is mounted; once mounted, the overlay's own permissions take over.
 
-- **Copy mode** -- the container runs directly as `--user <host_uid>:<host_gid>` so writes to the cloned workspace and `/jaiph/run` land owned by the host user.
-- **Overlay mode** -- the container starts as `--user 0:0` because `fuse-overlayfs` needs root to mount `/jaiph/workspace` (the image's `USER jaiph` cannot, since it doesn't own the mountpoint after the host UID is forwarded). The host CLI passes `JAIPH_HOST_UID` and `JAIPH_HOST_GID` env vars; `overlay-run.sh` mounts the overlay, `chown`s `/jaiph/run` to the host user, then `exec`s the workflow via `setpriv --reuid=$JAIPH_HOST_UID --regid=$JAIPH_HOST_GID --clear-groups -- jaiph run --raw …`. The workflow itself never runs as root. Custom images that lack `setpriv` print a one-line warning and run the workflow as root inside the container; install `util-linux` (which provides `setpriv`) to restore the privilege drop.
+Custom images used in overlay mode must either run as a UID that owns `/jaiph/workspace`, or create `/jaiph/workspace` with mode `1777`. If neither is true, fuse-overlayfs will fail to mount with `E_DOCKER_OVERLAY` and you should fall back to copy mode (`JAIPH_DOCKER_NO_OVERLAY=1`).
 
 On macOS Docker Desktop the VM transparently translates UIDs across the bind-mount boundary, so no `--user` override is applied.
 
diff --git a/src/runtime/docker.test.ts b/src/runtime/docker.test.ts
index 8ac0ae02..3ff37fdd 100644
--- a/src/runtime/docker.test.ts
+++ b/src/runtime/docker.test.ts
@@ -440,7 +440,6 @@ test("writeOverlayScript: creates executable script with fuse-overlayfs setup",
     assert.ok(content.startsWith("#!/usr/bin/env bash"));
     assert.ok(content.includes("fuse-overlayfs -o"));
     assert.ok(content.includes("lowerdir=$LOWER,upperdir=$UPPER,workdir=$WORK"));
-    assert.ok(content.includes("allow_other"), "allow_other so non-root user can access overlay after privilege drop");
     assert.ok(content.includes('exec "$@"'));
     assert.ok(content.includes("E_DOCKER_OVERLAY"));
   } finally {
@@ -448,16 +447,15 @@ test("writeOverlayScript: creates executable script with fuse-overlayfs setup",
   }
 });
 
-test("writeOverlayScript: drops privileges via setpriv when JAIPH_HOST_UID/GID provided", () => {
+test("writeOverlayScript: contains no setpriv / chown / JAIPH_HOST_UID dance", () => {
+  // Mountpoint perms (1777) in the image let the container run as the host
+  // UID directly — no need for root + privilege drop inside the container.
   const scriptPath = writeOverlayScript();
   try {
     const content = readFileSync(scriptPath, "utf8");
-    assert.ok(content.includes("JAIPH_HOST_UID"), "honors JAIPH_HOST_UID");
-    assert.ok(content.includes("JAIPH_HOST_GID"), "honors JAIPH_HOST_GID");
-    assert.ok(content.includes("setpriv"), "uses setpriv to drop privileges");
-    assert.ok(content.includes("--reuid="), "passes --reuid to setpriv");
-    assert.ok(content.includes("--regid="), "passes --regid to setpriv");
-    assert.ok(content.includes("chown"), "chowns /jaiph/run so artifacts end up host-owned");
+    assert.ok(!content.includes("setpriv"), "no setpriv: container runs as host UID from start");
+    assert.ok(!content.includes("JAIPH_HOST_UID"), "no JAIPH_HOST_UID env contract");
+    assert.ok(!content.includes("chown"), "no in-container chown");
   } finally {
     rmSync(dirname(scriptPath), { recursive: true, force: true });
   }
@@ -639,14 +637,34 @@ test("buildDockerArgs: includes --cap-drop ALL and --security-opt no-new-privile
   const capDropIdx = args.indexOf("--cap-drop");
   assert.ok(capDropIdx >= 0, "--cap-drop present");
   assert.equal(args[capDropIdx + 1], "ALL");
-  const capAddIdx = args.indexOf("--cap-add");
-  assert.ok(capAddIdx >= 0, "--cap-add present");
-  assert.equal(args[capAddIdx + 1], "SYS_ADMIN");
   const secOptIdx = args.indexOf("--security-opt");
   assert.ok(secOptIdx >= 0, "--security-opt present");
   assert.equal(args[secOptIdx + 1], "no-new-privileges");
 });
 
+test("buildDockerArgs: overlay mode adds only SYS_ADMIN (no SETUID/SETGID/CHOWN)", () => {
+  // Container runs as host UID throughout — no in-container privilege drop,
+  // no chown. The image's 1777 mountpoint lets fuse-overlayfs mount as host UID.
+  const args = buildDockerArgs(defaultOpts(), TEST_OVERLAY);
+  const capAddValues = args
+    .map((v, i) => (v === "--cap-add" ? args[i + 1] : null))
+    .filter((v): v is string => v !== null);
+  assert.deepStrictEqual(capAddValues, ["SYS_ADMIN"], "only SYS_ADMIN, used by fuse-overlayfs to mount directly");
+});
+
+test("buildDockerArgs: copy mode adds no caps", () => {
+  const cloneDir = mkdtempSync(join(tmpdir(), "jaiph-test-clone-"));
+  try {
+    const args = buildDockerArgs(copyOpts(cloneDir));
+    const capAddValues = args
+      .map((v, i) => (v === "--cap-add" ? args[i + 1] : null))
+      .filter((v): v is string => v !== null);
+    assert.deepStrictEqual(capAddValues, [], "copy mode runs with no added capabilities");
+  } finally {
+    rmSync(cloneDir, { recursive: true, force: true });
+  }
+});
+
 test("buildDockerArgs: overlay mode adds --security-opt apparmor=unconfined on Linux to allow fuse mounts", () => {
   if (process.platform !== "linux") return;
   const args = buildDockerArgs(defaultOpts(), TEST_OVERLAY);
@@ -748,36 +766,38 @@ test("buildDockerArgs: throws when overlay mode is selected without script path"
 // buildDockerArgs: UID/GID handling (Linux only)
 // ---------------------------------------------------------------------------
 
-test("buildDockerArgs: overlay mode runs container as root and forwards JAIPH_HOST_UID/GID (Linux)", () => {
+test("buildDockerArgs: both modes run as host UID:GID on Linux (no privilege-drop dance)", () => {
   if (process.platform !== "linux") return;
-  const args = buildDockerArgs(defaultOpts(), TEST_OVERLAY);
 
-  const userIdx = args.indexOf("--user");
-  assert.ok(userIdx >= 0, "--user flag present");
-  assert.equal(args[userIdx + 1], "0:0", "container runs as root so fuse-overlayfs can mount /jaiph/workspace");
+  const overlayArgs = buildDockerArgs(defaultOpts(), TEST_OVERLAY);
+  const overlayUserIdx = overlayArgs.indexOf("--user");
+  assert.ok(overlayUserIdx >= 0, "overlay: --user flag present");
+  assert.notEqual(overlayArgs[overlayUserIdx + 1], "0:0", "overlay: container does NOT run as root");
+  assert.match(overlayArgs[overlayUserIdx + 1], /^\d+:\d+$/, "overlay: --user is host uid:gid");
 
-  const envFlags = args
-    .map((v, i) => (v === "-e" ? args[i + 1] : null))
-    .filter((v): v is string => v !== null);
-  assert.ok(envFlags.some((v) => v.startsWith("JAIPH_HOST_UID=")), "JAIPH_HOST_UID forwarded to container");
-  assert.ok(envFlags.some((v) => v.startsWith("JAIPH_HOST_GID=")), "JAIPH_HOST_GID forwarded to container");
+  const cloneDir = mkdtempSync(join(tmpdir(), "jaiph-test-clone-"));
+  try {
+    const copyArgs = buildDockerArgs(copyOpts(cloneDir));
+    const copyUserIdx = copyArgs.indexOf("--user");
+    assert.ok(copyUserIdx >= 0, "copy: --user flag present");
+    assert.equal(copyArgs[copyUserIdx + 1], overlayArgs[overlayUserIdx + 1], "copy and overlay use the same host UID:GID");
+  } finally {
+    rmSync(cloneDir, { recursive: true, force: true });
+  }
 });
 
-test("buildDockerArgs: copy mode runs as host UID:GID and does NOT forward JAIPH_HOST_UID/GID (Linux)", () => {
-  if (process.platform !== "linux") return;
+test("buildDockerArgs: no JAIPH_HOST_UID/GID env injected (no privilege-drop contract)", () => {
+  const overlayArgs = buildDockerArgs(defaultOpts(), TEST_OVERLAY);
   const cloneDir = mkdtempSync(join(tmpdir(), "jaiph-test-clone-"));
   try {
-    const args = buildDockerArgs(copyOpts(cloneDir));
-
-    const userIdx = args.indexOf("--user");
-    assert.ok(userIdx >= 0, "--user flag present in copy mode");
-    assert.notEqual(args[userIdx + 1], "0:0", "copy mode runs as host UID, not root");
-
-    const envFlags = args
-      .map((v, i) => (v === "-e" ? args[i + 1] : null))
-      .filter((v): v is string => v !== null);
-    assert.ok(!envFlags.some((v) => v.startsWith("JAIPH_HOST_UID=")), "no privilege-drop env in copy mode");
-    assert.ok(!envFlags.some((v) => v.startsWith("JAIPH_HOST_GID=")), "no privilege-drop env in copy mode");
+    const copyArgs = buildDockerArgs(copyOpts(cloneDir));
+    for (const args of [overlayArgs, copyArgs]) {
+      const envFlags = args
+        .map((v, i) => (v === "-e" ? args[i + 1] : null))
+        .filter((v): v is string => v !== null);
+      assert.ok(!envFlags.some((v) => v.startsWith("JAIPH_HOST_UID=")), "no JAIPH_HOST_UID env");
+      assert.ok(!envFlags.some((v) => v.startsWith("JAIPH_HOST_GID=")), "no JAIPH_HOST_GID env");
+    }
   } finally {
     rmSync(cloneDir, { recursive: true, force: true });
   }
diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts
index 2c88eac1..7143fb7b 100644
--- a/src/runtime/docker.ts
+++ b/src/runtime/docker.ts
@@ -319,7 +319,6 @@ LOWER=/jaiph/workspace-ro
 UPPER=/tmp/overlay-upper
 WORK=/tmp/overlay-work
 MERGED=/jaiph/workspace
-RUN_DIR=/jaiph/run
 mkdir -p "$UPPER" "$WORK" "$MERGED"
 
 if ! command -v fuse-overlayfs >/dev/null 2>&1; then
@@ -330,30 +329,13 @@ if [ ! -e /dev/fuse ]; then
   printf 'E_DOCKER_OVERLAY /dev/fuse not present in container; pass --device /dev/fuse or set JAIPH_DOCKER_NO_OVERLAY=1 to use the copy sandbox path\\n' >&2
   exit 78
 fi
-# allow_other so a non-root user can read/write through the overlay after the
-# privilege drop below. As root we don't need /etc/fuse.conf user_allow_other.
-if ! fuse-overlayfs -o "lowerdir=$LOWER,upperdir=$UPPER,workdir=$WORK,allow_other" "$MERGED" 2>/tmp/jaiph-fuse-overlay.err; then
+if ! fuse-overlayfs -o "lowerdir=$LOWER,upperdir=$UPPER,workdir=$WORK" "$MERGED" 2>/tmp/jaiph-fuse-overlay.err; then
   reason="$(tr '\\n' ' ' </tmp/jaiph-fuse-overlay.err | sed 's/[[:space:]]\\+/ /g; s/^ //; s/ $//')"
   printf 'E_DOCKER_OVERLAY fuse-overlayfs mount failed: %s\\n' "$reason" >&2
   exit 78
 fi
 
 cd "$MERGED"
-
-# Privilege drop: overlay setup needs root, but the workflow itself shouldn't
-# run as root. When the host passes JAIPH_HOST_UID/GID we chown /jaiph/run so
-# artifacts land owned by the host user (readable outside the container) and
-# exec via setpriv. If setpriv isn't installed (custom images) we fall back to
-# running as root with a single warning — security posture matches the legacy
-# overlay path.
-if [ -n "\${JAIPH_HOST_UID:-}" ] && [ -n "\${JAIPH_HOST_GID:-}" ]; then
-  chown -R "$JAIPH_HOST_UID:$JAIPH_HOST_GID" "$RUN_DIR" 2>/dev/null || true
-  if command -v setpriv >/dev/null 2>&1; then
-    exec setpriv --reuid="$JAIPH_HOST_UID" --regid="$JAIPH_HOST_GID" --clear-groups -- "$@"
-  fi
-  printf 'jaiph: setpriv not found in image; running workflow as root inside container\\n' >&2
-fi
-
 exec "$@"
 `;
 
@@ -616,6 +598,11 @@ export function buildDockerArgs(opts: DockerSpawnOptions, overlayScriptPath?: st
 
   args.push("--cap-drop", "ALL");
   if (mode === "overlay") {
+    // SYS_ADMIN is the only cap we add: fuse-overlayfs needs it to mount the
+    // union filesystem directly (without going through fusermount3's suid
+    // path, which would be neutered by --security-opt no-new-privileges
+    // anyway). The workflow doesn't need any of these — they're only used by
+    // the entrypoint script during the overlay setup.
     args.push("--cap-add", "SYS_ADMIN");
   }
   args.push("--security-opt", "no-new-privileges");
@@ -633,30 +620,20 @@ export function buildDockerArgs(opts: DockerSpawnOptions, overlayScriptPath?: st
     }
   }
 
-  // UID/GID strategy:
-  //   copy mode  → run as host UID:GID directly so writes to the cloned
-  //                workspace and /jaiph/run land owned by the host user.
-  //   overlay mode → run as root (0:0) so fuse-overlayfs can mount and own
-  //                  /jaiph/workspace, then the entrypoint script drops
-  //                  privileges to JAIPH_HOST_UID/GID via setpriv before
-  //                  exec'ing the workflow. We set --user 0:0 explicitly to
-  //                  override the image's USER directive (e.g. the official
-  //                  image runs as `jaiph` UID 10001 by default).
-  // Linux-only: macOS Docker Desktop translates UIDs across the VM boundary,
-  // so we don't override --user there.
-  let hostUid: string | undefined;
-  let hostGid: string | undefined;
+  // UID/GID strategy (Linux): always run the container as the host user so
+  // writes to bind mounts (cloned workspace in copy mode, /jaiph/run in both
+  // modes) end up owned by the host user. This works in overlay mode too
+  // because the official image's `/jaiph/workspace` mountpoint is mode 1777,
+  // giving any UID write access — required by fuse-overlayfs to mount there.
+  // macOS Docker Desktop translates UIDs across the VM boundary, so we don't
+  // override --user there.
   if (process.platform === "linux") {
     try {
-      hostUid = execSync("id -u", { encoding: "utf8" }).trim();
-      hostGid = execSync("id -g", { encoding: "utf8" }).trim();
+      const uid = execSync("id -u", { encoding: "utf8" }).trim();
+      const gid = execSync("id -g", { encoding: "utf8" }).trim();
+      args.push("--user", `${uid}:${gid}`);
     } catch {
-      // Fall through without --user / JAIPH_HOST_*
-    }
-    if (mode === "copy" && hostUid && hostGid) {
-      args.push("--user", `${hostUid}:${hostGid}`);
-    } else if (mode === "overlay") {
-      args.push("--user", "0:0");
+      // Fall through without --user.
     }
   }
 
@@ -711,12 +688,6 @@ export function buildDockerArgs(opts: DockerSpawnOptions, overlayScriptPath?: st
     }
   }
 
-  // Tell the in-container overlay script which UID/GID to drop to.
-  if (mode === "overlay" && hostUid && hostGid) {
-    args.push("-e", `JAIPH_HOST_UID=${hostUid}`);
-    args.push("-e", `JAIPH_HOST_GID=${hostGid}`);
-  }
-
   args.push("-w", CONTAINER_WORKSPACE);
   args.push(opts.config.image);
 

From e1ce395dafea3a542d4eb6d885ac4082dd0660d7 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Tue, 21 Apr 2026 10:02:02 +0200
Subject: [PATCH 31/38] =?UTF-8?q?queue:=20regression=20=E2=80=94=20Docker?=
 =?UTF-8?q?=20vs=20no-sandbox=20parity=20for=20failed-step=20output?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Docker mode currently omits the log paths and "Output of failed step"
section that no-sandbox prints on failure. Add a queue entry capturing
the repro and acceptance criteria.

Made-with: Cursor
---
 QUEUE.md | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/QUEUE.md b/QUEUE.md
index fc79bdc6..efd5a003 100644
--- a/QUEUE.md
+++ b/QUEUE.md
@@ -13,6 +13,71 @@ Process rules:
 
 ***
 
+## Regression — Docker vs no-sandbox: same user-visible output for failed steps (stderr / log paths) #dev-ready
+
+**Goal**
+
+Running the same workflow with the Docker sandbox and with `JAIPH_UNSAFE=true` (no sandbox) must produce the **same** user-visible outcome: same failure messaging, same pointers to run log files on the **host**, and tests that lock this in so it cannot regress silently. Today Docker mode fails without the detailed log paths and “Output of failed step” line that no-sandbox prints (likely related to how stderr is captured or forwarded from the container).
+
+**Call samples (repro)**
+
+```text
+examples/say_hello.jh
+```
+
+Docker sandbox (tmp workspace) — failure is terse; no log paths or quoted step output:
+
+```text
+Jaiph: Running say_hello.jh (Docker sandbox, tmp workspace)
+
+workflow default
+  ▸ rule name_was_provided
+  ·   ▸ script validate_name
+  ·   ✗ script validate_name (0s)
+  ✗ rule name_was_provided (0s)
+
+
+✗ FAIL workflow default (5.5s)
+  Workflow execution failed.
+```
+
+No sandbox — same failing step, but full diagnostics:
+
+```text
+JAIPH_UNSAFE=true examples/say_hello.jh
+```
+
+```text
+Jaiph: Running say_hello.jh (no sandbox)
+
+workflow default
+  ▸ rule name_was_provided
+  ·   ▸ script validate_name
+  ·   ✗ script validate_name (0s)
+  ✗ rule name_was_provided (0s)
+
+
+✗ FAIL workflow default (0.5s)
+  Logs: /Users/jakubdzikowski/projects/jaiph/.jaiph/runs/2026-04-21/07-55-32-say_hello.jh
+  Summary: /Users/jakubdzikowski/projects/jaiph/.jaiph/runs/2026-04-21/07-55-32-say_hello.jh/run_summary.jsonl
+    out: /Users/jakubdzikowski/projects/jaiph/.jaiph/runs/2026-04-21/07-55-32-say_hello.jh/000003-script__validate_name.out
+    err: /Users/jakubdzikowski/projects/jaiph/.jaiph/runs/2026-04-21/07-55-32-say_hello.jh/000003-script__validate_name.err
+
+  Output of failed step:
+    You didn't provide your name :(
+```
+
+**Direction (not prescriptive)**
+
+The Docker runtime should surface paths that **point at the proper files on the host** (bind-mounted run dir). If the container sees different paths internally, a **string replace on emitted output** in the runtime layer is likely enough so stderr/UI matches no-sandbox. Investigate stderr forwarding from Docker alongside path rewriting.
+
+**Acceptance criteria**
+
+* Automated test(s) run both modes (or equivalent contract tests on emitted stderr) and assert parity for this scenario: user sees the same failure footer (log paths + “Output of failed step” content) for Docker and no-sandbox.
+* No “fix” required in this queue item beyond specifying behavior and tests; implementation is the task.
+
+***
+
 ## Cleanup — consolidate the 5-way test directory split #dev-ready
 
 **Goal**

From f53fdea4f3d96337c2b40f2091b008a7d47193fc Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Tue, 21 Apr 2026 10:02:02 +0200
Subject: [PATCH 32/38] docs(landing): add "What is Jaiph?" intro section

Made-with: Cursor
---
 docs/index.html | 70 ++++++++++++++++++++++++++++++++++---------------
 1 file changed, 49 insertions(+), 21 deletions(-)

diff --git a/docs/index.html b/docs/index.html
index 73d3f6de..8731e685 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -79,8 +79,11 @@ <h2>Try it out!</h2>
                     <pre><code class="language-bash static-highlight"><span class="code-line">curl -fsSL https://jaiph.org/run | bash -s '</span><span class="code-line"><span class="ralph-keyword">workflow</span> <span class="ralph-definition">default</span>() {</span><span class="code-line">  <span class="ralph-keyword">const</span> <span class="ralph-variable">response</span> <span class="ralph-operator">=</span> <span class="ralph-keyword">prompt</span> <span class="ralph-string">"Say: Hello, I am [model name]!"</span></span><span class="code-line">  <span class="ralph-keyword">log</span> response</span><span class="code-line">}'</span></code></pre>
                     <p class="small">Installs Jaiph <strong>v0.9.2</strong> to <strong>~/.local/bin</strong> (if not
                         already
-                        installed), and runs the sample workflow with <a href="https://cursor.com/docs/cli/installation" target="_blank" rel="noopener noreferrer">Cursor CLI</a> agent backend (the default one). See <a href="#samples">more samples</a>!</p>
-                    <p class="small warning">Jaiph is under heavy development. Core features and workflow syntax are stable since v0.8.0, but you may expect breaking changes before v1.0.0.</p>
+                        installed), and runs the sample workflow with <a href="https://cursor.com/docs/cli/installation"
+                            target="_blank" rel="noopener noreferrer">Cursor CLI</a> agent backend (the default one).
+                        See <a href="#samples">more samples</a>!</p>
+                    <p class="small warning">Jaiph is under heavy development. Core features and workflow syntax are
+                        stable since v0.8.0, but you may expect breaking changes before v1.0.0.</p>
                 </div>
 
                 <div class="code-tab-panel" data-panel="try-init-project">
@@ -102,6 +105,17 @@ <h2>Try it out!</h2>
             </div>
         </section>
 
+        <section>
+            <h2>What is Jaiph?</h2>
+            <div class="card doc-content">
+                <p><strong>Jaiph</strong> is a language and runtime for defining and orchestrating AI agent workflows.
+                </p>
+                <p>It allows you to combine agentic workflows with strict checks and script calls. It comes with
+                    build-in Docker sanboxing for agentic workflows, and a set of tooling to make your development
+                    faster and more efficient.</p>
+            </div>
+        </section>
+
         <section>
             <h2>Why Jaiph?</h2>
             <div class="card doc-content">
@@ -114,10 +128,11 @@ <h3>Language</h3>
                         <p>Embed scripts in your favorite language</p>
                     </div>
                     <div class="comparison-col">
-                        <h3>Runtime</h3>
+                        <h3>Tooling</h3>
                         <p>Built-in Docker sandboxing</p>
                         <p>Built-in testing framework</p>
                         <p>Tracks and saves all agent responses</p>
+                        <p>Default formatter and VSCode plugin</p>
                     </div>
                     <div class="comparison-col">
                         <h3>Open Source</h3>
@@ -137,8 +152,7 @@ <h2 id="samples">Samples</h2>
                         data-target="say-hello-jh">say_hello.jh</button>
                     <button type="button" class="code-tab-button"
                         data-target="say-hello-test-jh">say_hello.test.jh</button>
-                    <button type="button" class="code-tab-button"
-                        data-target="recover-loop-jh">recover_loop.jh</button>
+                    <button type="button" class="code-tab-button" data-target="recover-loop-jh">recover_loop.jh</button>
                     <button type="button" class="code-tab-button"
                         data-target="inbox-pipeline-jh">agent_inbox.jh</button>
                     <button type="button" class="code-tab-button" data-target="async-jh">async.jh</button>
@@ -252,7 +266,8 @@ <h2 id="samples">Samples</h2>
   # When
   run hello.default("Alice")
 }</code></pre>
-                    <p>Example failing test run output (expected string omits the trailing <code>:(</code> from stderr):</p>
+                    <p>Example failing test run output (expected string omits the trailing <code>:(</code> from stderr):
+                    </p>
                     <pre><code class="jaiph-run" data-sample-output="failing-run"><span class="run-command">➜  ./say_hello.test.jh</span>
 testing say_hello.test.jh
   <span class="run-marker">▸</span> without name, workflow fails with validation message
@@ -317,7 +332,8 @@ <h2 id="samples">Samples</h2>
 
 <span class="run-pass">✓ PASS</span> <span class="run-keyword">workflow</span> default <span class="run-time">(0.5s)</span></code></pre>
                     <p>For one-shot failure handling without retry, use <code>catch</code> instead. See
-                        <a href="/language#recover--repair-and-retry-loop">Language — recover</a>.</p>
+                        <a href="/language#recover--repair-and-retry-loop">Language — recover</a>.
+                    </p>
                 </div>
 
                 <div class="code-tab-panel" data-panel="inbox-pipeline-jh" data-sample="agent-inbox"
@@ -468,10 +484,12 @@ <h3>Language</h3>
                 </p>
 
                 <h3>Runtime</h3>
-                <p><strong>Docker sandboxing.</strong> Workflows run inside Docker by default for local development, providing
-                    filesystem and process isolation for agent and shell actions. Disable with <code>JAIPH_UNSAFE=true</code>
-                    or <code>runtime.docker_enabled = false</code>. See <a
-                        href="sandboxing">Sandboxing</a>.</p>
+                <p><strong>Docker sandboxing.</strong> Workflows run inside Docker by default for local development,
+                    providing
+                    filesystem and process isolation for agent and shell actions. Disable with
+                    <code>JAIPH_UNSAFE=true</code>
+                    or <code>runtime.docker_enabled = false</code>. See <a href="sandboxing">Sandboxing</a>.
+                </p>
                 <p><strong>Hooks.</strong> Attach shell automation to workflow and step lifecycle events via
                     <code>~/.jaiph/hooks.json</code> or <code>&lt;project&gt;/.jaiph/hooks.json</code>. See <a
                         href="hooks">Hooks</a>.
@@ -484,7 +502,8 @@ <h3>Runtime</h3>
                     location with the built-in <code>jaiphlang/artifacts</code> library. <code>artifacts.save</code>
                     copies a file, <code>artifacts.save_patch</code> exports a git diff, and
                     <code>artifacts.apply_patch</code> replays it. Works identically in Docker and on the host.
-                    See <a href="libraries#jaiphlangartifacts--publishing-files-out-of-the-sandbox">Libraries</a>.</p>
+                    See <a href="libraries#jaiphlangartifacts--publishing-files-out-of-the-sandbox">Libraries</a>.
+                </p>
                 <p><strong>Configuration.</strong> Control behavior with <code>config { ... }</code> blocks
                     at the module level or inside individual workflows for per-workflow overrides, plus environment
                     variables (env wins precedence). See <a href="configuration">Configuration</a> and
@@ -515,9 +534,11 @@ <h2>Syntax</h2>
                 <h3>Jaiph workflows</h3>
                 <dl class="primitive-list">
                     <dt><code>config { ... }</code></dt>
-                    <dd>Optional runtime options (agent backend/flags, logs, runtime, module metadata). Allowed at the top level
+                    <dd>Optional runtime options (agent backend/flags, logs, runtime, module metadata). Allowed at the
+                        top level
                         (module-wide) and inside individual workflows (per-workflow overrides for <code>agent.*</code>
-                        and <code>run.*</code> keys only; <code>runtime.*</code> and <code>module.*</code> are module-level only). Environment variables override config values. See <a
+                        and <code>run.*</code> keys only; <code>runtime.*</code> and <code>module.*</code> are
+                        module-level only). Environment variables override config values. See <a
                             href="configuration">Configuration</a>.</dd>
 
                     <dt><code>import "file.jh" as alias</code> &middot; <code>const name = value</code> /
@@ -527,10 +548,12 @@ <h3>Jaiph workflows</h3>
                         shared by
                         rules, scripts, and workflows in the same file. Values can be single-line
                         <code>"..."</code> strings, triple-quoted <code>"""..."""</code> multiline strings,
-                        or bare tokens.</dd>
+                        or bare tokens.
+                    </dd>
 
                     <dt><code>rule name() { ... }</code> &middot; <code>rule name(params) { ... }</code> &middot;
-                        <code>workflow name() { ... }</code> &middot; <code>workflow name(params) { ... }</code> &middot;
+                        <code>workflow name() { ... }</code> &middot; <code>workflow name(params) { ... }</code>
+                        &middot;
                         <code>script name = `cmd`</code> &middot; <code>script name = ```[lang] ... ```</code>
                     </dt>
                     <dd><code>rule</code> is for reusable checks (Jaiph structured steps; used with
@@ -538,14 +561,17 @@ <h3>Jaiph workflows</h3>
                         <code>workflow</code> orchestrates Jaiph steps only, and <code>script</code> holds bash (or any
                         language via a fence lang tag like <code>```node</code>, <code>```python3</code>, or a custom
                         shebang) invoked with <code>run</code>. Rules and workflows <strong>require parentheses</strong>
-                        on every definition &mdash; even when parameterless (e.g. <code>workflow default() { &hellip; }</code>).
+                        on every definition &mdash; even when parameterless (e.g.
+                        <code>workflow default() { &hellip; }</code>).
                         Named parameters go inside the parentheses; the compiler validates
-                        call-site arity when the callee declares params. Any fence tag is valid &mdash; it maps directly to
+                        call-site arity when the callee declares params. Any fence tag is valid &mdash; it maps directly
+                        to
                         <code>#!/usr/bin/env &lt;tag&gt;</code>. Scripts run in <strong>full isolation</strong>
                         &mdash; only positional arguments
                         and essential Jaiph variables (<code>JAIPH_SCRIPTS</code>,
                         <code>JAIPH_WORKSPACE</code>) are inherited; module-scoped variables are not visible.
-                        Reuse shell helpers with <code>import script</code> or small named <code>script</code> blocks in the same module. Scripts are emitted as
+                        Reuse shell helpers with <code>import script</code> or small named <code>script</code> blocks in
+                        the same module. Scripts are emitted as
                         separate executable files under <code>scripts/</code> (within the run build output tree; see <a
                             href="cli">CLI reference</a>).
                     </dd>
@@ -563,7 +589,8 @@ <h3>Jaiph workflows</h3>
                         <code>run greet(name)</code> is equivalent to <code>run greet("${name}")</code>.
                         <code>run `body`(args)</code> embeds a one-off shell command directly
                         without a named <code>script</code> definition &mdash; supports arguments and capture.
-                        Use triple backticks for multiline: <code>run ```...```(args)</code>.</dd>
+                        Use triple backticks for multiline: <code>run ```...```(args)</code>.
+                    </dd>
 
                     <dt><code>prompt "..."</code> &middot; <code>prompt myVar</code> &middot;
                         <code>prompt """ ... """</code> &middot;
@@ -606,7 +633,8 @@ <h3>Jaiph workflows</h3>
                     </dd>
 
                     <dt><code>fail "reason"</code> &middot; <code>fail """..."""</code></dt>
-                    <dd><code>fail</code> aborts with stderr + non-zero exit. Use triple quotes for multiline messages.</dd>
+                    <dd><code>fail</code> aborts with stderr + non-zero exit. Use triple quotes for multiline messages.
+                    </dd>
 
                     <dt><code>ensure ref() catch (err) { … }</code> &middot;
                         <code>run ref() catch (err) { … }</code>

From d1745608d6ee9ef8fd6a504766d12538409c56f6 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Tue, 21 Apr 2026 10:28:05 +0200
Subject: [PATCH 33/38] ci: Ubuntu E2E matrix for Docker vs host, inherit
 JAIPH_UNSAFE in test_all

- Matrix: ubuntu (docker, unset JAIPH_UNSAFE), ubuntu (host), macos (host)
- WSL E2E exports JAIPH_UNSAFE=true to keep host-only runs
- test_all.sh no longer defaults JAIPH_UNSAFE=true; document fast local override
- contributing: document matrix and npm test:e2e behavior; fix unreleased summary

Made-with: Cursor
---
 .github/workflows/ci.yml | 19 +++++++++++++++++--
 CHANGELOG.md             |  6 ++++++
 docs/contributing.md     |  6 +++---
 e2e/test_all.sh          |  3 ++-
 4 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1637bb51..a119bec1 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -33,12 +33,26 @@ jobs:
           git ls-remote --exit-code https://github.com/jaiphlang/jaiph.git "refs/tags/v${VERSION}"
 
   e2e:
-    name: E2E install and CLI workflow (${{ matrix.os }})
+    name: E2E (${{ matrix.os }}, ${{ matrix.label }})
     runs-on: ${{ matrix.os }}
+    env:
+      # Host/safe split applies on Ubuntu only. macOS runners do not ship Docker the same way — keep host-only there.
+      # "docker": unset JAIPH_UNSAFE so resolveDockerConfig enables the sandbox (pulls ghcr.io/jaiphlang/jaiph-runtime).
+      # "host": explicit opt-out, same as a fast local `JAIPH_UNSAFE=true npm run test:e2e`.
+      JAIPH_UNSAFE: ${{ matrix.jaiph_unsafe }}
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest, macos-latest]
+        include:
+          - os: ubuntu-latest
+            label: docker
+            jaiph_unsafe: ""
+          - os: ubuntu-latest
+            label: host
+            jaiph_unsafe: "true"
+          - os: macos-latest
+            label: host
+            jaiph_unsafe: "true"
     steps:
       - name: Checkout
         uses: actions/checkout@v4
@@ -170,6 +184,7 @@ jobs:
           $distro = "${{ steps.detect_wsl.outputs.distro }}"
           wsl -d "$distro" -- bash -lc "set -euo pipefail
           export DEBIAN_FRONTEND=noninteractive
+          export JAIPH_UNSAFE=true
           sudo apt-get update
           sudo apt-get install -y curl ca-certificates
           if ! command -v node >/dev/null 2>&1; then
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 34078a3e..0360dbeb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,11 @@
 # Unreleased
 
+## Summary
+
+- **Language / runtime:** Adds `Handle<T>` for `run async`, repair-and-retry `recover` loops on `run` steps, explicit nested managed calls in arguments, and optional `module.*` fields in `config`.
+- **Docker / artifacts:** Read-only workspace with a COW sandbox layer and persistence through the run directory. Docker is on by default whenever `JAIPH_UNSAFE` is not set to `true` (including CI). Tighter capabilities plus mount and env denylists. **Breaking:** images must already include `jaiph` (default `ghcr.io/jaiphlang/jaiph-runtime`), with official builds published to GHCR. New `artifacts.jh` library and `JAIPH_ARTIFACTS_DIR` for pulling files and patches out of the sandbox.
+- **CLI / polish:** Clearer quoting in the progress tree and prompts, removal of dead isolation-export paths and stray repo debug junk, and a PTY-based E2E test for the async progress tree.
+
 ## All changes
 
 - **Fix — CLI/Runtime:** Clean up double-quote rendering in step titles and log output — Parameter values displayed in the progress tree (e.g. `message="Found 3 issues in auth module"`) no longer produce backslash-escaped `\"` sequences or ambiguous nested `""` pairs. The fix touches three layers: `formatNamedParamsForDisplay` and `formatParamsForDisplay` in `src/cli/commands/format-params.ts` no longer escape inner double quotes with backslash (the surrounding `key="value"` delimiters are structural, not shell-safe); `formatStartLine` in `src/cli/run/display.ts` applies the same change for prompt previews; and `node-workflow-runtime.ts` strips outer quotes from interpolated channel-send payloads via `stripOuterQuotes` so messages like `"Found 3 issues"` are stored as `Found 3 issues` rather than carrying literal quote wrappers through dispatch. Regression tests added: `format-params-display.test.ts` asserts no `\"` in formatted output for payloads containing inner quotes; `display.test.ts` verifies prompt previews pass through quotes without escaping. E2E golden output for `agent_inbox.jh` updated to match.
diff --git a/docs/contributing.md b/docs/contributing.md
index f0fd6a43..f01ecfbd 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -54,7 +54,7 @@ For day-to-day work on the compiler and CLI you usually stay inside the clone: i
 | `npm run test:acceptance:compiler` | **`npm run build`**, then **`node --test`** on only `dist/src/**/*.acceptance.test.js` — compiler acceptance tests without the full unit suite or E2E. |
 | `npm run test:acceptance:runtime` | **`bash ./e2e/test_all.sh`** only — same E2E driver as below **without** an implicit rebuild; ensure `dist/` is up to date before running. |
 | `npm run test:acceptance` | **`npm run test:acceptance:compiler`** then **`npm run test:acceptance:runtime`**. |
-| `npm run test:e2e` | **`npm run build`**, then **`bash ./e2e/test_all.sh`**. Prefer this when you want a fresh `dist/` before E2E. |
+| `npm run test:e2e` | **`npm run build`**, then **`bash ./e2e/test_all.sh`**. Prefer this when you want a fresh `dist/` before E2E. By default this exercises the **Docker** sandbox when `JAIPH_UNSAFE` is unset. For a faster host-only run (no container), use **`JAIPH_UNSAFE=true npm run test:e2e`**. |
 | `npm run test:samples` | **`npx playwright test`** — Playwright suite for the docs landing page (`tests/e2e-samples/`). Uses `http://127.0.0.1:4000` (see `playwright.config.ts`); starts Jekyll via `webServer` or reuses one already on that port. Requires Playwright (`npx playwright install chromium` once). |
 | `npm run test:ci` | `npm test` followed by `npm run test:e2e` — useful before pushing when you want the full local picture. |
 
@@ -188,9 +188,9 @@ The project uses GitHub Actions (`.github/workflows/ci.yml`). Every push trigger
 | Job | Runner | Purpose |
 |-----|--------|---------|
 | **Compiler and unit tests** | `ubuntu-latest` | `npm test` (TypeScript unit + acceptance + golden tests), plus a `curl` check that the public install URL responds and a git-tag verification on `main`. |
-| **E2E install and CLI workflow** | `ubuntu-latest`, `macos-latest` (matrix) | `npm run test:e2e` — full build-and-run E2E suite on each OS. |
+| **E2E install and CLI workflow** | Matrix: **`ubuntu-latest` twice** + **`macos-latest`** | `npm run test:e2e` — full build-and-run E2E suite. **Ubuntu — docker:** `JAIPH_UNSAFE` unset (default Docker sandbox, pulls `ghcr.io/jaiphlang/jaiph-runtime`). **Ubuntu — host:** `JAIPH_UNSAFE=true` (host execution, no Docker). **macOS — host:** `JAIPH_UNSAFE=true` (macOS runners are not used for the Docker path). |
 | **Getting started (local)** | `ubuntu-latest` | Builds and serves the Jekyll documentation site locally (`bundle exec jekyll serve` on `127.0.0.1:4000`), waits for it to respond, smoke-checks key pages with `curl`, then runs the **Playwright landing-page sample verification** (`npx playwright test`). The Playwright step builds Jaiph, extracts sample source and expected output from the served HTML, verifies source parity with `examples/*.jh`, and runs deterministic samples through the CLI. No dependency on `jaiph.org`. |
-| **E2E install and CLI workflow (windows-latest + wsl)** | `windows-latest` | Detects an available WSL distro, installs Node inside it, and runs `npm run test:e2e` under WSL. Skipped when no distro is present on the runner image. |
+| **E2E install and CLI workflow (windows-latest + wsl)** | `windows-latest` | Detects an available WSL distro, installs Node inside it, and runs `npm run test:e2e` under WSL with **`JAIPH_UNSAFE=true`** (host-only, matching the previous default). Skipped when no distro is present on the runner image. |
 
 ### npm publish on tag (trusted publishing)
 
diff --git a/e2e/test_all.sh b/e2e/test_all.sh
index 847791f2..105245a2 100755
--- a/e2e/test_all.sh
+++ b/e2e/test_all.sh
@@ -98,8 +98,9 @@ for script in "${TEST_SCRIPTS[@]}"; do
   mkdir -p "${test_dir}"
 
   e2e::section "Running ${script_name}"
+  # JAIPH_UNSAFE is not defaulted here: unset → Docker on (see resolveDockerConfig).
+  # CI sets per-job env (ubuntu docker vs host). For fast local runs: JAIPH_UNSAFE=true npm run test:e2e
   if JAIPH_E2E_SKIP_INSTALL=1 \
-    JAIPH_UNSAFE="${JAIPH_UNSAFE:-true}" \
     JAIPH_E2E_TMP_DIR="${JAIPH_E2E_TMP_DIR:-}" \
     JAIPH_E2E_BIN_DIR="${JAIPH_E2E_BIN_DIR}" \
     JAIPH_E2E_WORK_DIR="${JAIPH_E2E_WORK_DIR}" \

From b03e127e20a5e5e7341c39bdc85f4d784e5ee651 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Tue, 21 Apr 2026 10:28:34 +0200
Subject: [PATCH 34/38] runtime(docker): simplify Linux overlay path to root
 mount

Use a single Linux overlay strategy with --user 0:0 and only SYS_ADMIN, remove the setpriv/host UID handoff, and make the runtime image keep /jaiph/workspace root-owned so fuse-overlayfs mountpoint checks pass reliably in CI.

Made-with: Cursor
---
 docker/Dockerfile.runtime  | 10 +-----
 docs/sandboxing.md         | 11 ++++---
 src/runtime/docker.test.ts | 65 ++++++++++++++++++--------------------
 src/runtime/docker.ts      | 31 +++++++++---------
 4 files changed, 52 insertions(+), 65 deletions(-)

diff --git a/docker/Dockerfile.runtime b/docker/Dockerfile.runtime
index 86669f5f..433c641f 100644
--- a/docker/Dockerfile.runtime
+++ b/docker/Dockerfile.runtime
@@ -25,18 +25,10 @@ RUN apt-get update && \
     rm -rf /var/lib/apt/lists/*
 
 # Non-root user for sandbox safety.
-#
-# /jaiph/workspace and /jaiph/run are mountpoints that get covered at runtime
-# (overlay or bind mount). Their bare permissions only matter before the mount
-# happens. We set them to 1777 (sticky-bit world-writable) so the container can
-# run as the *host* UID — not the image's `jaiph` UID — without having to chown
-# anything at runtime. fusermount3/fuse-overlayfs need write access to the
-# mountpoint to mount the overlay, and a host UID that doesn't match `jaiph`
-# (10001) wouldn't have it on a 0755 dir.
 RUN useradd --create-home --uid 10001 --shell /bin/bash jaiph && \
     mkdir -p /jaiph/workspace /jaiph/workspace-ro /jaiph/run && \
     chown -R jaiph:jaiph /jaiph && \
-    chmod 1777 /jaiph/workspace /jaiph/run
+    chown root:root /jaiph/workspace
 
 # Install jaiph from the local tarball (provided at build time via --build-arg).
 # The tarball is produced by `npm pack` in CI before the docker build step.
diff --git a/docs/sandboxing.md b/docs/sandboxing.md
index 8b95baf2..ee45cc29 100644
--- a/docs/sandboxing.md
+++ b/docs/sandboxing.md
@@ -26,7 +26,7 @@ Docker sandboxing is designed to contain damage from untrusted or semi-trusted w
 **What Docker protects against:**
 
 - **Filesystem access** -- Scripts inside the container cannot read or write arbitrary host paths. The container's `/jaiph/workspace` is either an in-container fuse-overlayfs union over a read-only bind of the host workspace (overlay mode, writes land in a tmpfs upper layer and are discarded on exit) or a host-side clone of the workspace mounted read-write (copy mode, the clone is removed on exit). Only the run-artifacts directory (`/jaiph/run`) persists writes back to the host workspace.
-- **Process isolation** -- Container processes cannot see or signal host processes. The container runs with `--cap-drop ALL` (overlay mode re-adds `SYS_ADMIN` so fuse-overlayfs can mount the union filesystem; copy mode adds nothing) and `--security-opt no-new-privileges` to prevent privilege escalation. The container always runs as the host UID/GID — never as root — so writes to bind mounts are owned by the host user.
+- **Process isolation** -- Container processes cannot see or signal host processes. The container runs with `--cap-drop ALL` (overlay mode re-adds `SYS_ADMIN` for fuse-overlayfs; copy mode adds nothing) and `--security-opt no-new-privileges` to prevent privilege escalation. In Linux overlay mode the workflow runs as root inside the container so fuse-overlayfs can mount reliably; copy mode and macOS remain non-root as before.
 - **Credential leakage** -- Sensitive host environment variables (`SSH_*`, `GPG_*`, `AWS_*`, `GCP_*`, `AZURE_*`, `GOOGLE_*`, `DOCKER_*`, `KUBE*`, `NPM_TOKEN*`) are never forwarded into the container. Only `JAIPH_*` (except `JAIPH_DOCKER_*`) and agent prefixes (`ANTHROPIC_*`, `CLAUDE_*`, `CURSOR_*`) cross the container boundary.
 - **Mount safety** -- The host root filesystem (`/`), Docker socket (`/var/run/docker.sock`, `/run/docker.sock`), and OS internals (`/proc`, `/sys`, `/dev`) cannot be mounted into the container. Attempting to do so produces `E_VALIDATE_MOUNT`.
 
@@ -150,11 +150,12 @@ The working directory is `/jaiph/workspace`. In overlay mode the host CLI genera
 
 ### Runtime behavior
 
-**Container lifecycle** -- `docker run --rm` launches the container and auto-removes it on exit. `--cap-drop ALL` drops all Linux capabilities; overlay mode re-adds `SYS_ADMIN` (required for fuse-overlayfs to mount the union filesystem directly without going through fusermount3's suid path) and exposes `/dev/fuse` via `--device`, while copy mode runs without either. `--security-opt no-new-privileges` prevents any process inside the container from gaining additional privileges. The pseudo-TTY flag (`-t`) is intentionally omitted: Docker's `-t` merges stderr into stdout, which would break the `__JAIPH_EVENT__` stderr-only live contract.
+**Container lifecycle** -- `docker run --rm` launches the container and auto-removes it on exit. `--cap-drop ALL` drops all Linux capabilities; overlay mode re-adds only `SYS_ADMIN` (fuse-overlayfs mount). Copy mode adds nothing. `--security-opt no-new-privileges` prevents any process inside the container from gaining additional privileges. The pseudo-TTY flag (`-t`) is intentionally omitted: Docker's `-t` merges stderr into stdout, which would break the `__JAIPH_EVENT__` stderr-only live contract.
 
-**UID/GID handling on Linux** -- the container always runs as `--user <host_uid>:<host_gid>` (in both overlay and copy mode), so writes to bind mounts (`/jaiph/run` always; `/jaiph/workspace` in copy mode) end up owned by the host user. There is no in-container privilege drop — the workflow runs as the host UID from the moment the container starts. This works in overlay mode because the official runtime image creates `/jaiph/workspace` (the fuse-overlayfs mountpoint) with mode `1777`, giving any UID write access to the bare mountpoint — required by fuse-overlayfs, since otherwise a host UID that doesn't match the image's `jaiph` user (10001) would be refused. The 1777 mode only matters before the overlay is mounted; once mounted, the overlay's own permissions take over.
+**UID/GID handling on Linux:**
 
-Custom images used in overlay mode must either run as a UID that owns `/jaiph/workspace`, or create `/jaiph/workspace` with mode `1777`. If neither is true, fuse-overlayfs will fail to mount with `E_DOCKER_OVERLAY` and you should fall back to copy mode (`JAIPH_DOCKER_NO_OVERLAY=1`).
+- **Copy mode** -- the container runs directly as `--user <host_uid>:<host_gid>` so writes to the cloned workspace and `/jaiph/run` land owned by the host user.
+- **Overlay mode** -- the container runs as `--user 0:0` and executes the workflow as root inside the container. This keeps the overlay path simple and robust on Linux runners where `fusermount3` enforces strict mountpoint checks.
 
 On macOS Docker Desktop the VM transparently translates UIDs across the bind-mount boundary, so no `--user` override is applied.
 
@@ -162,7 +163,7 @@ On macOS Docker Desktop the VM transparently translates UIDs across the bind-mou
 
 **Events** -- The container's jaiph runs in `--raw` mode: it spawns the runtime with inherited stdio, so `__JAIPH_EVENT__` JSON flows directly to the container's stderr. The host CLI reads Docker's stderr pipe and renders the progress tree. stdout carries plain script output. `STEP_END` events embed `out_content` (and `err_content` on failure) so consumers do not need host paths to step artifact files.
 
-**Sandbox primitive (overlay vs. copy)** -- Selected at launch time. If `/dev/fuse` exists on the host, the CLI uses **overlay mode**: the `overlay-run.sh` wrapper (generated by the host CLI and mounted read-only) sets up `fuse-overlayfs` with the ro bind mount (`/jaiph/workspace-ro`) as the lower layer and a tmpfs as the upper layer, merged at `/jaiph/workspace`. All workspace writes go to the tmpfs and are discarded on container exit. On Linux hosts, the overlay container is also launched with `--security-opt apparmor=unconfined` because the default Docker AppArmor profile (active on Ubuntu 22.04+, GitHub Actions runners, and similar) denies fuse mounts even when `SYS_ADMIN` and `/dev/fuse` are present. If `fuse-overlayfs` is missing from the image or the mount still fails at runtime, the entrypoint exits with `E_DOCKER_OVERLAY` -- there is no in-container fallback. Set `JAIPH_DOCKER_NO_OVERLAY=1` on the host to opt into copy mode instead.
+**Sandbox primitive (overlay vs. copy)** -- Selected at launch time. If `/dev/fuse` exists on the host, the CLI uses **overlay mode**: the `overlay-run.sh` wrapper (generated by the host CLI and mounted read-only) sets up `fuse-overlayfs` with the ro bind mount (`/jaiph/workspace-ro`) as the lower layer and a tmpfs as the upper layer, merged at `/jaiph/workspace`. All workspace writes go to the tmpfs and are discarded on container exit. On Linux hosts, the overlay container is also launched with `--security-opt apparmor=unconfined` because the default Docker AppArmor profile (active on Ubuntu 22.04+, GitHub Actions runners, and similar) denies fuse mounts even when `SYS_ADMIN` and `/dev/fuse` are present. If `fuse-overlayfs` is missing from the image or the mount still fails at runtime, the entrypoint exits with `E_DOCKER_OVERLAY` -- there is no in-container fallback. Set `JAIPH_DOCKER_NO_OVERLAY=1` on the host to opt into copy mode instead. Custom images used in overlay mode must ensure `/jaiph/workspace` is mountable by root (the official image keeps this path root-owned).
 
 If `/dev/fuse` is missing on the host, the CLI uses **copy mode**: before launching the container it clones the workspace into `<runs-root>/.sandbox-<id>/` (excluding `.jaiph/runs`) using `cp -cR` on macOS (APFS clonefile, O(1) per file) or `cp -pR` elsewhere (a real copy; a single stderr warning is printed when the fast path is unavailable). The clone is bind-mounted rw at `/jaiph/workspace`. After the container exits the clone is removed unless `JAIPH_DOCKER_KEEP_SANDBOX=1` is set, in which case the path is left in place and printed to stderr for debugging.
 
diff --git a/src/runtime/docker.test.ts b/src/runtime/docker.test.ts
index 3ff37fdd..46b90452 100644
--- a/src/runtime/docker.test.ts
+++ b/src/runtime/docker.test.ts
@@ -447,15 +447,15 @@ test("writeOverlayScript: creates executable script with fuse-overlayfs setup",
   }
 });
 
-test("writeOverlayScript: contains no setpriv / chown / JAIPH_HOST_UID dance", () => {
-  // Mountpoint perms (1777) in the image let the container run as the host
-  // UID directly — no need for root + privilege drop inside the container.
+test("writeOverlayScript: contains no setpriv/chown host-uid dance", () => {
   const scriptPath = writeOverlayScript();
   try {
     const content = readFileSync(scriptPath, "utf8");
-    assert.ok(!content.includes("setpriv"), "no setpriv: container runs as host UID from start");
-    assert.ok(!content.includes("JAIPH_HOST_UID"), "no JAIPH_HOST_UID env contract");
+    assert.ok(!content.includes("JAIPH_HOST_UID"), "no JAIPH_HOST_UID contract");
+    assert.ok(!content.includes("JAIPH_HOST_GID"), "no JAIPH_HOST_GID contract");
+    assert.ok(!content.includes("setpriv"), "no in-container privilege drop");
     assert.ok(!content.includes("chown"), "no in-container chown");
+    assert.ok(!content.includes("allow_other"), "no allow_other needed");
   } finally {
     rmSync(dirname(scriptPath), { recursive: true, force: true });
   }
@@ -642,14 +642,12 @@ test("buildDockerArgs: includes --cap-drop ALL and --security-opt no-new-privile
   assert.equal(args[secOptIdx + 1], "no-new-privileges");
 });
 
-test("buildDockerArgs: overlay mode adds only SYS_ADMIN (no SETUID/SETGID/CHOWN)", () => {
-  // Container runs as host UID throughout — no in-container privilege drop,
-  // no chown. The image's 1777 mountpoint lets fuse-overlayfs mount as host UID.
+test("buildDockerArgs: overlay mode adds only SYS_ADMIN", () => {
   const args = buildDockerArgs(defaultOpts(), TEST_OVERLAY);
   const capAddValues = args
     .map((v, i) => (v === "--cap-add" ? args[i + 1] : null))
     .filter((v): v is string => v !== null);
-  assert.deepStrictEqual(capAddValues, ["SYS_ADMIN"], "only SYS_ADMIN, used by fuse-overlayfs to mount directly");
+  assert.deepStrictEqual(capAddValues, ["SYS_ADMIN"], "only SYS_ADMIN is required");
 });
 
 test("buildDockerArgs: copy mode adds no caps", () => {
@@ -766,38 +764,35 @@ test("buildDockerArgs: throws when overlay mode is selected without script path"
 // buildDockerArgs: UID/GID handling (Linux only)
 // ---------------------------------------------------------------------------
 
-test("buildDockerArgs: both modes run as host UID:GID on Linux (no privilege-drop dance)", () => {
+test("buildDockerArgs: overlay mode runs as root and does not inject JAIPH_HOST_UID/GID (Linux)", () => {
   if (process.platform !== "linux") return;
+  const args = buildDockerArgs(defaultOpts(), TEST_OVERLAY);
+  const userIdx = args.indexOf("--user");
+  assert.ok(userIdx >= 0, "--user flag present");
+  assert.equal(args[userIdx + 1], "0:0", "overlay starts as root so fuse-overlayfs can mount /jaiph/workspace");
 
-  const overlayArgs = buildDockerArgs(defaultOpts(), TEST_OVERLAY);
-  const overlayUserIdx = overlayArgs.indexOf("--user");
-  assert.ok(overlayUserIdx >= 0, "overlay: --user flag present");
-  assert.notEqual(overlayArgs[overlayUserIdx + 1], "0:0", "overlay: container does NOT run as root");
-  assert.match(overlayArgs[overlayUserIdx + 1], /^\d+:\d+$/, "overlay: --user is host uid:gid");
-
-  const cloneDir = mkdtempSync(join(tmpdir(), "jaiph-test-clone-"));
-  try {
-    const copyArgs = buildDockerArgs(copyOpts(cloneDir));
-    const copyUserIdx = copyArgs.indexOf("--user");
-    assert.ok(copyUserIdx >= 0, "copy: --user flag present");
-    assert.equal(copyArgs[copyUserIdx + 1], overlayArgs[overlayUserIdx + 1], "copy and overlay use the same host UID:GID");
-  } finally {
-    rmSync(cloneDir, { recursive: true, force: true });
-  }
+  const envFlags = args
+    .map((v, i) => (v === "-e" ? args[i + 1] : null))
+    .filter((v): v is string => v !== null);
+  assert.ok(!envFlags.some((v) => v.startsWith("JAIPH_HOST_UID=")), "no JAIPH_HOST_UID env");
+  assert.ok(!envFlags.some((v) => v.startsWith("JAIPH_HOST_GID=")), "no JAIPH_HOST_GID env");
 });
 
-test("buildDockerArgs: no JAIPH_HOST_UID/GID env injected (no privilege-drop contract)", () => {
-  const overlayArgs = buildDockerArgs(defaultOpts(), TEST_OVERLAY);
+test("buildDockerArgs: copy mode runs as host UID:GID directly (Linux)", () => {
+  if (process.platform !== "linux") return;
   const cloneDir = mkdtempSync(join(tmpdir(), "jaiph-test-clone-"));
   try {
-    const copyArgs = buildDockerArgs(copyOpts(cloneDir));
-    for (const args of [overlayArgs, copyArgs]) {
-      const envFlags = args
-        .map((v, i) => (v === "-e" ? args[i + 1] : null))
-        .filter((v): v is string => v !== null);
-      assert.ok(!envFlags.some((v) => v.startsWith("JAIPH_HOST_UID=")), "no JAIPH_HOST_UID env");
-      assert.ok(!envFlags.some((v) => v.startsWith("JAIPH_HOST_GID=")), "no JAIPH_HOST_GID env");
-    }
+    const args = buildDockerArgs(copyOpts(cloneDir));
+    const userIdx = args.indexOf("--user");
+    assert.ok(userIdx >= 0, "--user flag present");
+    assert.notEqual(args[userIdx + 1], "0:0", "copy mode runs as host UID, not root");
+    assert.match(args[userIdx + 1], /^\d+:\d+$/, "copy mode --user is uid:gid");
+
+    const envFlags = args
+      .map((v, i) => (v === "-e" ? args[i + 1] : null))
+      .filter((v): v is string => v !== null);
+    assert.ok(!envFlags.some((v) => v.startsWith("JAIPH_HOST_UID=")), "no JAIPH_HOST_UID env in copy mode");
+    assert.ok(!envFlags.some((v) => v.startsWith("JAIPH_HOST_GID=")), "no JAIPH_HOST_GID env in copy mode");
   } finally {
     rmSync(cloneDir, { recursive: true, force: true });
   }
diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts
index 7143fb7b..817b5749 100644
--- a/src/runtime/docker.ts
+++ b/src/runtime/docker.ts
@@ -598,11 +598,7 @@ export function buildDockerArgs(opts: DockerSpawnOptions, overlayScriptPath?: st
 
   args.push("--cap-drop", "ALL");
   if (mode === "overlay") {
-    // SYS_ADMIN is the only cap we add: fuse-overlayfs needs it to mount the
-    // union filesystem directly (without going through fusermount3's suid
-    // path, which would be neutered by --security-opt no-new-privileges
-    // anyway). The workflow doesn't need any of these — they're only used by
-    // the entrypoint script during the overlay setup.
+    // SYS_ADMIN lets fuse-overlayfs mount the union filesystem.
     args.push("--cap-add", "SYS_ADMIN");
   }
   args.push("--security-opt", "no-new-privileges");
@@ -620,20 +616,23 @@ export function buildDockerArgs(opts: DockerSpawnOptions, overlayScriptPath?: st
     }
   }
 
-  // UID/GID strategy (Linux): always run the container as the host user so
-  // writes to bind mounts (cloned workspace in copy mode, /jaiph/run in both
-  // modes) end up owned by the host user. This works in overlay mode too
-  // because the official image's `/jaiph/workspace` mountpoint is mode 1777,
-  // giving any UID write access — required by fuse-overlayfs to mount there.
+  // UID/GID strategy (Linux):
+  //   copy mode    → --user host_uid:host_gid directly.
+  //   overlay mode → --user 0:0 so fuse-overlayfs can mount on /jaiph/workspace.
+  //                  The workflow runs as root inside the container in this mode.
   // macOS Docker Desktop translates UIDs across the VM boundary, so we don't
   // override --user there.
   if (process.platform === "linux") {
-    try {
-      const uid = execSync("id -u", { encoding: "utf8" }).trim();
-      const gid = execSync("id -g", { encoding: "utf8" }).trim();
-      args.push("--user", `${uid}:${gid}`);
-    } catch {
-      // Fall through without --user.
+    if (mode === "overlay") {
+      args.push("--user", "0:0");
+    } else {
+      try {
+        const uid = execSync("id -u", { encoding: "utf8" }).trim();
+        const gid = execSync("id -g", { encoding: "utf8" }).trim();
+        args.push("--user", `${uid}:${gid}`);
+      } catch {
+        // Fall through without --user.
+      }
     }
   }
 

From f3b0d27fc8a5a8e9e36bd564994c9f66c96dca1f Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Tue, 21 Apr 2026 10:49:05 +0200
Subject: [PATCH 35/38] Always publish new docker image

Signed-off-by: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a119bec1..0d2cd293 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -204,7 +204,7 @@ jobs:
 
   docker-publish:
     name: Publish Docker runtime image
-    needs: [test, e2e, docs-local, e2e-wsl]
+    # needs: [test, e2e, docs-local, e2e-wsl]
     if: github.ref == 'refs/heads/nightly' || startsWith(github.ref, 'refs/tags/v')
     runs-on: ubuntu-latest
     permissions:

From ac5a06f3b882a96f02c9ca08b9364f3ca98a183d Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Tue, 21 Apr 2026 10:50:09 +0200
Subject: [PATCH 36/38] runtime(docker): make overlay run dir writable on
 userns hosts

In Linux overlay mode, chmod the host run-directory bind mount to 0777 before docker run so user-namespace remapped container root can still create run artifacts under /jaiph/run.

Made-with: Cursor
---
 src/runtime/docker.test.ts |  6 ++++++
 src/runtime/docker.ts      | 14 +++++++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/runtime/docker.test.ts b/src/runtime/docker.test.ts
index 46b90452..f0bb2a67 100644
--- a/src/runtime/docker.test.ts
+++ b/src/runtime/docker.test.ts
@@ -487,6 +487,12 @@ test("spawnDockerProcess: stdin ignored, stdout+stderr piped for events", () =>
   );
 });
 
+test("spawnDockerProcess: Linux overlay mode chmods sandbox run dir for userns-remap compatibility", () => {
+  const src = readFileSync(join(__dirname, "docker.ts"), "utf8");
+  assert.ok(src.includes("mode === \"overlay\""), "guarded to overlay mode");
+  assert.ok(src.includes("chmodSync(opts.sandboxRunDir, 0o777)"), "run dir chmod present");
+});
+
 // ---------------------------------------------------------------------------
 // resolveDockerConfig: imageExplicit
 // ---------------------------------------------------------------------------
diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts
index 817b5749..cbd0c8d9 100644
--- a/src/runtime/docker.ts
+++ b/src/runtime/docker.ts
@@ -1,5 +1,5 @@
 import { execFileSync, execSync, spawn, spawnSync, ChildProcess } from "node:child_process";
-import { existsSync, mkdirSync, mkdtempSync, readFileSync, readdirSync, rmSync, writeFileSync } from "node:fs";
+import { chmodSync, existsSync, mkdirSync, mkdtempSync, readFileSync, readdirSync, rmSync, writeFileSync } from "node:fs";
 import { randomBytes } from "node:crypto";
 import { tmpdir } from "node:os";
 import { join, resolve, dirname, relative } from "node:path";
@@ -746,6 +746,18 @@ export function spawnDockerProcess(opts: DockerSpawnOptions): DockerSpawnResult
 
   const mode: SandboxMode = opts.sandboxMode ?? selectSandboxMode(opts.env);
   mkdirSync(opts.sandboxRunDir, { recursive: true });
+  // Linux overlay mode runs as container root. Some hosts run Docker with
+  // user-namespace remapping, where container root is not host root and cannot
+  // create entries in a 0755 host-owned bind mount. Make the run dir
+  // world-writable so artifacts remain writable regardless of UID mapping.
+  if (process.platform === "linux" && mode === "overlay") {
+    try {
+      chmodSync(opts.sandboxRunDir, 0o777);
+    } catch {
+      // Best effort: if chmod fails, docker run may still succeed on hosts
+      // without user-namespace remapping.
+    }
+  }
 
   let overlayScriptPath: string | undefined;
   let overlayScriptDir: string | undefined;

From 68edf37d4d1136706075f1a5a2bd2db7c3368236 Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Tue, 21 Apr 2026 11:04:52 +0200
Subject: [PATCH 37/38] runtime/e2e: hard-drop legacy .jaiph/Dockerfile path

Remove remaining runtime helper and E2E/init/docs references to .jaiph/Dockerfile so Docker execution only uses configured/default images with no implicit Dockerfile build behavior.

Made-with: Cursor
---
 docs/sandboxing.md                          |   1 -
 e2e/test_all.sh                             |   1 -
 e2e/tests/00_install_and_init.sh            |   5 -
 e2e/tests/73_docker_dockerfile_detection.sh | 141 --------------------
 src/cli/commands/init.test.ts               |  10 --
 src/runtime/docker.ts                       |  23 ----
 6 files changed, 181 deletions(-)
 delete mode 100644 e2e/tests/73_docker_dockerfile_detection.sh

diff --git a/docs/sandboxing.md b/docs/sandboxing.md
index ee45cc29..c69ed0ae 100644
--- a/docs/sandboxing.md
+++ b/docs/sandboxing.md
@@ -187,7 +187,6 @@ Docker-related errors use `E_DOCKER_*` codes for programmatic detection:
 |------------|---------|----------|
 | `E_DOCKER_NOT_FOUND` | `docker info` fails (Docker not installed or daemon not running) | Run exits immediately. No fallback to local execution. |
 | `E_DOCKER_PULL` | `docker pull` fails (network error, image not found, auth failure) | Run exits. Check registry access and image name. |
-| `E_DOCKER_BUILD` | `docker build` fails when using helpers that build from a Dockerfile | Not used during normal `jaiph run` image resolution. |
 | `E_DOCKER_NO_JAIPH` | Selected image does not contain a `jaiph` CLI | Run exits with guidance to use the official image or install jaiph. |
 | `E_DOCKER_RUNS_DIR` | Absolute `JAIPH_RUNS_DIR` points outside the workspace | Run exits. Use a relative path or an absolute path within the workspace. |
 | `E_DOCKER_OVERLAY` | Overlay mode selected but `fuse-overlayfs` is missing from the image or the mount fails inside the container | Container exits with code 78. Use the official runtime image, install `fuse-overlayfs` in your custom image, or set `JAIPH_DOCKER_NO_OVERLAY=1` on the host to switch to copy mode. The CLI already passes `--security-opt apparmor=unconfined` on Linux to defeat the default AppArmor fuse-deny; remaining failures usually mean the host kernel itself blocks fuse mounts (rootless docker without the right user-namespace setup, locked-down kernel, etc.). |
diff --git a/e2e/test_all.sh b/e2e/test_all.sh
index 105245a2..b6a70955 100755
--- a/e2e/test_all.sh
+++ b/e2e/test_all.sh
@@ -23,7 +23,6 @@ TEST_SCRIPTS=(
   "e2e/tests/70_run_artifacts.sh"
   "e2e/tests/71_loop_run_artifacts.sh"
   "e2e/tests/72_docker_run_artifacts.sh"
-  "e2e/tests/73_docker_dockerfile_detection.sh"
   "e2e/tests/74_live_step_output.sh"
   "e2e/tests/78_lang_redesign_constructs.sh"
   "e2e/tests/79_workflow_fail_keyword.sh"
diff --git a/e2e/tests/00_install_and_init.sh b/e2e/tests/00_install_and_init.sh
index 279db221..0b607f04 100644
--- a/e2e/tests/00_install_and_init.sh
+++ b/e2e/tests/00_install_and_init.sh
@@ -68,11 +68,6 @@ e2e::pass "bootstrap template matches expected triple-quoted prompt content"
 jaiph compile "${BOOTSTRAP_FILE}"
 e2e::pass "generated bootstrap workflow compiles"
 
-if [[ -f "${TEST_DIR}/.jaiph/Dockerfile" ]]; then
-  e2e::fail "Expected jaiph init not to create .jaiph/Dockerfile"
-fi
-e2e::pass "jaiph init does not create .jaiph/Dockerfile"
-
 # Bash command substitution strips a trailing newline; compare bytes with cmp.
 if ! cmp -s "${TEST_DIR}/.jaiph/.gitignore" <(printf 'runs\ntmp\n'); then
   e2e::fail "Expected .jaiph/.gitignore to list runs and tmp with a final newline"
diff --git a/e2e/tests/73_docker_dockerfile_detection.sh b/e2e/tests/73_docker_dockerfile_detection.sh
deleted file mode 100644
index 638d429b..00000000
--- a/e2e/tests/73_docker_dockerfile_detection.sh
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env bash
-
-set -euo pipefail
-
-ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
-source "${ROOT_DIR}/e2e/lib/common.sh"
-trap e2e::cleanup EXIT
-
-e2e::prepare_test_env "docker_dockerfile_detection"
-TEST_DIR="${JAIPH_E2E_TEST_DIR}"
-
-# Gate on Docker availability — skip gracefully when Docker is not installed.
-if ! command -v docker >/dev/null 2>&1 || ! docker info >/dev/null 2>&1; then
-  e2e::section "docker dockerfile detection (skipped — Docker unavailable)"
-  e2e::skip "Docker is not available, skipping Dockerfile detection tests"
-  exit 0
-fi
-
-# Build the E2E test image (used for explicit-image tests below).
-if ! e2e::ensure_docker_test_image; then
-  e2e::section "docker dockerfile detection (skipped — test image build failed)"
-  e2e::skip "Could not build local Docker test image"
-  exit 0
-fi
-
-e2e::section "docker — invalid .jaiph/Dockerfile is not built on run"
-
-# Given: a syntactically invalid .jaiph/Dockerfile. If `jaiph run` tried to build it,
-# the run would fail with E_DOCKER_BUILD. The driver must use the default GHCR image instead.
-mkdir -p "${TEST_DIR}/.jaiph"
-printf '%s\n' 'THIS IS NOT A VALID DOCKERFILE' > "${TEST_DIR}/.jaiph/Dockerfile"
-
-e2e::file "dockerfile_ignored.jh" <<'EOF'
-script ping_impl = ```
-echo "pulled default image ok"
-```
-rule ping() {
-  run ping_impl()
-}
-
-workflow default() {
-  ensure ping()
-}
-EOF
-
-# When: Docker enabled, implicit default image (pull ghcr.io/jaiphlang/jaiph-runtime:<version>)
-if ! JAIPH_DOCKER_ENABLED=true jaiph run "${TEST_DIR}/dockerfile_ignored.jh" >/dev/null 2>&1; then
-  JAIPH_DOCKER_ENABLED=true jaiph run "${TEST_DIR}/dockerfile_ignored.jh" || true
-  e2e::fail "docker: run should use pulled default image, not build broken .jaiph/Dockerfile"
-fi
-
-run_dir="$(e2e::run_dir "dockerfile_ignored.jh")"
-e2e::expect_run_file "dockerfile_ignored.jh" "000003-script__ping_impl.out" "pulled default image ok"
-e2e::pass "docker: broken .jaiph/Dockerfile is ignored; default runtime image is used"
-
-e2e::section "docker — explicit image with present .jaiph/Dockerfile"
-
-# Given: same workspace with invalid .jaiph/Dockerfile, explicit image set
-e2e::file "dockerfile_skip.jh" <<'EOF'
-script check_no_marker_impl = ```
-if test -f /jaiph-runtime-marker; then
-  echo "marker unexpectedly found"
-  exit 1
-fi
-echo "no marker"
-```
-rule check_no_marker() {
-  run check_no_marker_impl()
-}
-
-workflow default() {
-  ensure check_no_marker()
-}
-EOF
-
-# When: run with Docker enabled AND explicit image (should skip Dockerfile)
-JAIPH_DOCKER_ENABLED=true JAIPH_DOCKER_IMAGE="${E2E_DOCKER_TEST_IMAGE}" jaiph run "${TEST_DIR}/dockerfile_skip.jh" >/dev/null 2>&1
-
-# Then: the marker file should NOT exist (E2E test image, not custom build)
-e2e::expect_run_file "dockerfile_skip.jh" "000003-script__check_no_marker_impl.out" "no marker"
-e2e::pass "docker: explicit image used; .jaiph/Dockerfile not built"
-
-e2e::section "docker — workspace without .jaiph/Dockerfile uses configured image"
-
-# Given: a separate test dir without .jaiph/Dockerfile, using the E2E test image
-fallback_dir="$(mktemp -d "${JAIPH_E2E_WORK_DIR}/docker_fallback.XXXXXX")"
-cat > "${fallback_dir}/fallback.jh" <<'EOF'
-script greet_impl = ```
-echo "hello fallback"
-```
-rule greet() {
-  run greet_impl()
-}
-
-workflow default() {
-  ensure greet()
-}
-EOF
-
-# When: run with Docker enabled and explicit E2E image (no .jaiph/Dockerfile present)
-JAIPH_DOCKER_ENABLED=true JAIPH_DOCKER_IMAGE="${E2E_DOCKER_TEST_IMAGE}" JAIPH_WORKSPACE="${fallback_dir}" jaiph run "${fallback_dir}/fallback.jh" >/dev/null 2>&1
-
-# Then: should succeed using the configured image
-fallback_run_dir="$(e2e::run_dir_at "${fallback_dir}/.jaiph/runs" "fallback.jh")"
-fallback_summary="${fallback_run_dir}run_summary.jsonl"
-e2e::assert_file_exists "${fallback_summary}" "docker: fallback run_summary.jsonl exists"
-e2e::pass "docker: falls back to configured image without .jaiph/Dockerfile"
-
-e2e::section "docker dockerfile detection — agent env vars are forwarded"
-
-# Given: a workflow that checks visibility of agent env vars
-e2e::file "envforward.jh" <<'EOF'
-script check_env_impl = ```
-echo "ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-unset}"
-echo "CURSOR_SESSION=${CURSOR_SESSION:-unset}"
-```
-rule check_env() {
-  run check_env_impl()
-}
-
-workflow default() {
-  ensure check_env()
-}
-EOF
-
-# When: run with agent env vars set on host
-JAIPH_DOCKER_ENABLED=true \
-  JAIPH_DOCKER_IMAGE="${E2E_DOCKER_TEST_IMAGE}" \
-  ANTHROPIC_API_KEY="test-key-123" \
-  CURSOR_SESSION="test-session-456" \
-  jaiph run "${TEST_DIR}/envforward.jh" >/dev/null 2>&1
-
-# Then: agent env vars are forwarded to the container (ANTHROPIC_*, CURSOR_* prefixes)
-run_dir="$(e2e::run_dir "envforward.jh")"
-out_content="$(<"${run_dir}000003-script__check_env_impl.out")"
-# assert_contains: script .out may include additional env vars or runtime-injected lines
-e2e::assert_contains "${out_content}" "ANTHROPIC_API_KEY=test-key-123" "docker: ANTHROPIC_API_KEY forwarded"
-# assert_contains: script .out may include additional env vars or runtime-injected lines
-e2e::assert_contains "${out_content}" "CURSOR_SESSION=test-session-456" "docker: CURSOR_SESSION forwarded"
-
-rm -rf "${fallback_dir}"
diff --git a/src/cli/commands/init.test.ts b/src/cli/commands/init.test.ts
index 2d87e43f..3602130d 100644
--- a/src/cli/commands/init.test.ts
+++ b/src/cli/commands/init.test.ts
@@ -49,16 +49,6 @@ test("init: generated bootstrap uses triple-quoted prompt and parses", () => {
   }
 });
 
-test("init: does not create .jaiph/Dockerfile", () => {
-  const dir = makeTempDir();
-  try {
-    assert.equal(runInit([dir]), 0);
-    assert.equal(existsSync(join(dir, ".jaiph", "Dockerfile")), false);
-  } finally {
-    rmSync(dir, { recursive: true, force: true });
-  }
-});
-
 test("init: fails when .jaiph/.gitignore exists with unexpected content", () => {
   const dir = makeTempDir();
   try {
diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts
index cbd0c8d9..4ac9d60a 100644
--- a/src/runtime/docker.ts
+++ b/src/runtime/docker.ts
@@ -229,29 +229,6 @@ export function pullImageIfNeeded(image: string): void {
   }
 }
 
-// ---------------------------------------------------------------------------
-// Dockerfile-based image build
-// ---------------------------------------------------------------------------
-
-const DOCKERFILE_IMAGE_TAG = "jaiph-runtime:latest";
-
-/**
- * Build a Docker image from a Dockerfile and tag it.
- * Throws on build failure.
- */
-export function buildImageFromDockerfile(dockerfilePath: string, tag: string = DOCKERFILE_IMAGE_TAG): string {
-  const contextDir = dirname(dockerfilePath);
-  try {
-    execSync(`docker build -t ${tag} -f ${dockerfilePath} ${contextDir}`, {
-      stdio: "inherit",
-      timeout: 600_000,
-    });
-  } catch {
-    throw new Error(`E_DOCKER_BUILD failed to build image from "${dockerfilePath}"`);
-  }
-  return tag;
-}
-
 function imageHasJaiph(image: string): boolean {
   try {
     execFileSync(

From 4ddde517c3dbb3dcb3c30d9204aab3a9f7cd22cf Mon Sep 17 00:00:00 2001
From: Jakub Dzikowski <jakub.t.dzikowski@gmail.com>
Date: Tue, 21 Apr 2026 11:12:54 +0200
Subject: [PATCH 38/38] ci: self-heal WSL E2E by provisioning Ubuntu

Provision an Ubuntu WSL distro on windows-latest when none is preinstalled so E2E tests always run instead of being skipped, and fail loudly if provisioning cannot complete.

Made-with: Cursor
---
 .github/workflows/ci.yml | 48 ++++++++++++++++++++++++++--------------
 1 file changed, 31 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0d2cd293..9dee6387 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -165,42 +165,56 @@ jobs:
         id: detect_wsl
         shell: pwsh
         run: |
+          $ciDistro = "jaiph-ci-ubuntu"
           $distros = @(wsl -l -q | ForEach-Object { $_.Trim() } | Where-Object { $_ -ne "" })
           if ($distros.Count -eq 0) {
-            "distro=" >> $env:GITHUB_OUTPUT
-            Write-Warning "No WSL distro is available on this runner. Skipping WSL E2E."
-            exit 0
+            Write-Warning "No WSL distro is available on this runner. Importing Ubuntu rootfs for CI."
+            $archivePath = Join-Path $env:RUNNER_TEMP "ubuntu-base-24.04.tar.gz"
+            $installPath = Join-Path $env:RUNNER_TEMP "wsl-ubuntu"
+            $ubuntuBaseUrl = "https://cdimage.ubuntu.com/ubuntu-base/releases/24.04/release/ubuntu-base-24.04-base-amd64.tar.gz"
+
+            if (Test-Path $installPath) {
+              Remove-Item -Path $installPath -Recurse -Force
+            }
+            New-Item -ItemType Directory -Path $installPath -Force | Out-Null
+            Invoke-WebRequest -Uri $ubuntuBaseUrl -OutFile $archivePath
+            wsl --import "$ciDistro" "$installPath" "$archivePath" --version 2
+            $distros = @("$ciDistro")
           }
           $ubuntu = $distros | Where-Object { $_ -match "^Ubuntu" } | Select-Object -First 1
           $selected = if ($ubuntu) { $ubuntu } else { $distros[0] }
+          if (-not $selected) {
+            Write-Error "Failed to provision a WSL distro for CI."
+            exit 1
+          }
           "distro=$selected" >> $env:GITHUB_OUTPUT
           Write-Host "Using WSL distro: $selected"
 
       - name: Install Node and run E2E tests in WSL
-        if: steps.detect_wsl.outputs.distro != ''
         shell: pwsh
         run: |
           $workspace = "${{ github.workspace }}"
           $distro = "${{ steps.detect_wsl.outputs.distro }}"
-          wsl -d "$distro" -- bash -lc "set -euo pipefail
+          $env:JAIPH_WORKSPACE = $workspace
+          $bashScript = @'
+          set -euo pipefail
           export DEBIAN_FRONTEND=noninteractive
           export JAIPH_UNSAFE=true
-          sudo apt-get update
-          sudo apt-get install -y curl ca-certificates
+          SUDO=
+          if [ "$(id -u)" -ne 0 ]; then
+            SUDO=sudo
+          fi
+          $SUDO apt-get update
+          $SUDO apt-get install -y curl ca-certificates
           if ! command -v node >/dev/null 2>&1; then
-            curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash -
-            sudo apt-get install -y nodejs
+            curl -fsSL https://deb.nodesource.com/setup_20.x | $SUDO -E bash -
+            $SUDO apt-get install -y nodejs
           fi
-          cd \"\$(wslpath '$workspace')\"
+          cd "$(wslpath "$JAIPH_WORKSPACE")"
           npm ci
           npm run test:e2e
-          "
-
-      - name: WSL E2E skipped
-        if: steps.detect_wsl.outputs.distro == ''
-        shell: pwsh
-        run: |
-          Write-Host "No WSL distro found on this runner image; skipping WSL E2E."
+          '@
+          wsl -d "$distro" -- bash -lc "$bashScript"
 
   docker-publish:
     name: Publish Docker runtime image