From 435e7aeff44e66f20b04a0d43e427cee3c8c0dbe Mon Sep 17 00:00:00 2001 From: Jakub Dzikowski Date: Fri, 17 Apr 2026 15:55:19 +0200 Subject: [PATCH 01/38] Add Docker live run-artifact regression test. Protect the host-mounted .jaiph/runs contract by asserting Docker-backed runs create and grow step .out/.err files before the workflow exits. Made-with: Cursor --- e2e/tests/75_docker_live_step_output.sh | 112 ++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 e2e/tests/75_docker_live_step_output.sh diff --git a/e2e/tests/75_docker_live_step_output.sh b/e2e/tests/75_docker_live_step_output.sh new file mode 100644 index 00000000..ee76f709 --- /dev/null +++ b/e2e/tests/75_docker_live_step_output.sh @@ -0,0 +1,112 @@ +#!/usr/bin/env bash + +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +source "${ROOT_DIR}/e2e/lib/common.sh" +trap e2e::cleanup EXIT + +e2e::prepare_test_env "docker_live_step_output" +TEST_DIR="${JAIPH_E2E_TEST_DIR}" + +# Gate on Docker availability — skip gracefully when Docker is not installed. +if ! command -v docker >/dev/null 2>&1 || ! docker info >/dev/null 2>&1; then + e2e::section "docker live step output (skipped — Docker unavailable)" + e2e::skip "Docker is not available, skipping Docker live artifact test" + exit 0 +fi + +e2e::section "docker step .out/.err files grow live during execution" + +e2e::file "live_out_docker.jh" <<'WORKFLOW' +script slow_writer_impl = ``` +echo "line-1" +echo "err-1" >&2 +sleep 1 +echo "line-2" +echo "err-2" >&2 +sleep 1 +echo "line-3" +echo "err-3" >&2 +``` +rule slow_writer() { + run slow_writer_impl() +} + +workflow default() { + ensure slow_writer() +} +WORKFLOW + +run_err="$(mktemp)" +JAIPH_DOCKER_ENABLED=true jaiph run "${TEST_DIR}/live_out_docker.jh" 2>"${run_err}" & +run_pid=$! + +out_file="" +err_file="" +for _ in $(seq 1 50); do + sleep 0.1 + shopt -s nullglob + out_candidates=( "${TEST_DIR}/.jaiph/runs/"*/*"live_out_docker.jh/"*slow_writer_impl.out ) + err_candidates=( "${TEST_DIR}/.jaiph/runs/"*/*"live_out_docker.jh/"*slow_writer_impl.err ) + shopt -u nullglob + if [[ ${#out_candidates[@]} -ge 1 && ${#err_candidates[@]} -ge 1 ]]; then + out_file="${out_candidates[0]}" + err_file="${err_candidates[0]}" + break + fi +done + +sleep 1 +if ! kill -0 "$run_pid" 2>/dev/null; then + e2e::fail "docker run finished before live sample; increase slow_writer duration" +fi + +mid_out_size="" +mid_err_size="" +if [[ -n "$out_file" && -f "$out_file" ]]; then + mid_out_size="$(wc -c < "$out_file")" +fi +if [[ -n "$err_file" && -f "$err_file" ]]; then + mid_err_size="$(wc -c < "$err_file")" +fi + +wait "$run_pid" || true +rm -f "${run_err}" + +if [[ -z "$out_file" || -z "$err_file" ]]; then + e2e::fail "docker out/err files never appeared during execution" +fi + +if [[ -z "$mid_out_size" || "$mid_out_size" -eq 0 ]]; then + e2e::fail "docker out file was empty when sampled mid-execution (mid_out_size=${mid_out_size:-})" +fi +if [[ -z "$mid_err_size" || "$mid_err_size" -eq 0 ]]; then + e2e::fail "docker err file was empty when sampled mid-execution (mid_err_size=${mid_err_size:-})" +fi + +final_out_size="$(wc -c < "$out_file")" +if [[ "$final_out_size" -gt "$mid_out_size" ]]; then + e2e::pass "docker out file grew live: mid=${mid_out_size}B final=${final_out_size}B" +elif [[ "$final_out_size" -eq "$mid_out_size" && "$final_out_size" -gt 0 ]]; then + e2e::pass "docker out file was live-written (sampled ${mid_out_size}B, final ${final_out_size}B)" +else + e2e::fail "docker out file did not grow (mid=${mid_out_size}B final=${final_out_size}B)" +fi + +final_err_size="$(wc -c < "$err_file")" +if [[ "$final_err_size" -gt "$mid_err_size" ]]; then + e2e::pass "docker err file grew live: mid=${mid_err_size}B final=${final_err_size}B" +elif [[ "$final_err_size" -eq "$mid_err_size" && "$final_err_size" -gt 0 ]]; then + e2e::pass "docker err file was live-written (sampled ${mid_err_size}B, final ${final_err_size}B)" +else + e2e::fail "docker err file did not grow (mid=${mid_err_size}B final=${final_err_size}B)" +fi + +final_content="$(<"$out_file")" +expected_out="$(printf 'line-1\nline-2\nline-3')" +e2e::assert_equals "${final_content}" "${expected_out}" "docker final .out content" + +final_err_content="$(<"$err_file")" +expected_err="$(printf 'err-1\nerr-2\nerr-3')" +e2e::assert_equals "${final_err_content}" "${expected_err}" "docker final .err content" From 81e9aa3904e5a02ee366378d9dd03237dc50c278 Mon Sep 17 00:00:00 2001 From: Jakub Dzikowski Date: Fri, 17 Apr 2026 20:38:27 +0200 Subject: [PATCH 02/38] Fix explicit nested managed calls in Docker runs. Keep nested run/ensure calls explicit across validation, formatting, and runtime execution, and make Docker use the local Jaiph package with a writable workspace fallback so container behavior matches local runs. Made-with: Cursor --- .jaiph/architect_review.jh | 2 +- QUEUE.md | 90 +++++++++++++- src/cli/commands/run.ts | 4 +- src/format/emit.ts | 18 +++ src/runtime/docker.test.ts | 26 ++-- src/runtime/docker.ts | 123 +++++++++++++++++-- src/runtime/kernel/node-workflow-runtime.ts | 108 +++++++++++++++- src/transpile/validate-managed-calls.test.ts | 23 ++++ src/transpile/validate.ts | 60 +++++++++ test/sample-build.test.ts | 34 +++++ 10 files changed, 457 insertions(+), 31 deletions(-) diff --git a/.jaiph/architect_review.jh b/.jaiph/architect_review.jh index a85f59e0..22fa919b 100755 --- a/.jaiph/architect_review.jh +++ b/.jaiph/architect_review.jh @@ -102,7 +102,7 @@ workflow review_one_header(header) { const verdict = run first_line_str(packed) const updated_description = run rest_lines_str(packed) const body_file = run jaiph_review_body_file() - run mkdir_p_simple(run, jaiph_tmp_dir()) + run mkdir_p_simple(run jaiph_tmp_dir()) run str_equals(verdict, "dev-ready") catch (err) { run arg_nonempty(updated_description) catch (err) { fail "needs-work requires a non-empty updated_description (questions for the author)." diff --git a/QUEUE.md b/QUEUE.md index 5e70340b..7f3cd348 100644 --- a/QUEUE.md +++ b/QUEUE.md @@ -12,6 +12,93 @@ Process rules: *** +## Runtime/DSL — require explicit managed calls inside nested argument lists #dev-ready + +**Goal** +Keep managed execution explicit everywhere, including inside argument lists. Nested argument-position managed calls are valid **only** when they use the correct keyword: + +* `run foo(run bar())` +* `run foo(ensure rule_bar())` +* `run foo(run \`echo "aaa"\`())` + +The bare call-like forms must stay invalid: + +* `run foo(bar())` +* `run foo(rule_bar())` +* `run foo(\`echo "aaa"\`())` +* `const x = bar()` + +The explicit capture-then-pass form is also valid: + +* `const x = run bar()` +* `run foo(x)` + +This is a deliberate language rule: scripts/workflows are only executable via `run`, and rules are only executable via `ensure`, even when nested inside another call's args. + +**Why this task exists** +The runtime bug that treated `run foo(run bar())` as two literal argv tokens (`"run"`, `"bar()"`) proves the language intent is not being enforced correctly. The fix is to support **explicit** nested managed calls and reject **implicit** bare call-like execution. + +**Context** + +* Parser / call-arg parsing: `src/parse/core.ts`, `src/parse/workflows.ts`, `src/parse/steps.ts` +* Validation: `src/transpile/validate.ts`, `src/transpile/validate-managed-calls.test.ts` +* Runtime arg handling: `src/runtime/kernel/node-workflow-runtime.ts` +* Formatting / round-trip emit: `src/format/emit.ts` +* Real motivating workflow: `.jaiph/architect_review.jh` + +**Implementation requirements** + +1. **Language rule** + * Managed steps remain explicit everywhere. + * Inside call arguments, nested execution is allowed only via: + * `run ref(...)` + * `ensure ref(...)` + * `run \`...\`()` + * Bare nested call-like forms are invalid and must not be interpreted as executable. + +2. **Compiler behavior** + * Accept explicit nested managed calls in arg position. + * Reject bare nested call-like syntax in arg position with a clear parse/validate error. + * Reject bare call-like execution in `const` assignments as well. + * Error message must tell the user to add the missing `run` / `ensure` keyword, not to rely on implicit execution. + +3. **Runtime behavior** + * Runtime arg evaluation must execute explicit nested managed calls correctly and pass their resulting value as a single argument. + * Runtime must never interpret a bare call-like token as executable work. + +4. **Formatter / emitter** + * Ensure formatting round-trips valid nested explicit forms like `run foo(run bar())`. + * Ensure invalid bare nested call-like forms are not generated by the emitter. + +5. **Workflow update** + * Keep or update `.jaiph/architect_review.jh` to use the explicit nested form: + * `run mkdir_p_simple(run jaiph_tmp_dir())` + +6. **Tests** + * Add/keep a regression test proving `run mkdir_p_simple(jaiph_tmp_dir())` is rejected. + * Add/keep a regression test proving `run foo(rule_bar())` is rejected. + * Add/keep a regression test proving `run foo(\`echo "aaa"\`())` is rejected. + * Add/keep a regression test proving `const x = bar()` is rejected. + * Add/keep a positive test for `run mkdir_p_simple(run jaiph_tmp_dir())`. + * Add/keep a positive test for `run foo(ensure rule_bar())`. + * Add/keep a positive test for nested inline script form `run foo(run \`echo "aaa"\`())`. + * Add/keep a positive test for `const x = run bar()` followed by `run foo(x)`. + +**Acceptance criteria** + +* `run foo(run bar())` succeeds and passes the nested result as one argument. +* `run foo(ensure rule_bar())` succeeds and passes the nested result as one argument. +* `run foo(run \`echo "aaa"\`())` succeeds and passes the nested result as one argument. +* `run foo(bar())` fails at compile time with an actionable error. +* `run foo(rule_bar())` fails at compile time with an actionable error. +* `run foo(\`echo "aaa"\`())` fails at compile time with an actionable error. +* `const x = run bar()` is accepted. +* `const x = bar()` fails at compile time with an actionable error. +* `.jaiph/architect_review.jh` uses the explicit nested form. +* Tests prevent regression back to implicit bare nested execution. + +*** + ## Docker — strict image contract + publish official `jaiph-runtime` images to GHCR **Goal** @@ -232,4 +319,5 @@ Extend the language with workflow parameters: `workflow analyze(file: string, de * Errors produce `isError: true` responses (no server crash). * E2E test passes. -*** \ No newline at end of file +*** + diff --git a/src/cli/commands/run.ts b/src/cli/commands/run.ts index 570f02d5..1a87ceab 100644 --- a/src/cli/commands/run.ts +++ b/src/cli/commands/run.ts @@ -381,7 +381,9 @@ function reportResult( } } const runtimeDebugEnabled = runtimeEnv.JAIPH_DEBUG === "true"; - const runtimeErrorPrinted = hasFatalRuntimeStderr(capturedStderr, runtimeDebugEnabled); + const runtimeErrorPrinted = sandboxRunDir + ? false + : hasFatalRuntimeStderr(capturedStderr, runtimeDebugEnabled); const resolvedStatus = exitStatus !== 0 || runtimeErrorPrinted ? 1 : 0; emitter.emit("workflow_end", { diff --git a/src/format/emit.ts b/src/format/emit.ts index 30a9e6c4..bd7c7e08 100644 --- a/src/format/emit.ts +++ b/src/format/emit.ts @@ -13,6 +13,7 @@ import type { WorkflowMetadata, TopLevelEmitOrder, } from "../types"; +import { parseCallRef } from "../parse/core"; export interface EmitOptions { indent: number; @@ -378,6 +379,23 @@ function formatArgs(args: string, bareIdentifierArgs?: string[]): string { while (i < args.length) { while (i < args.length && (args[i] === " " || args[i] === "\t")) i++; if (i >= args.length) break; + const tail = args.slice(i); + const keyword = tail.startsWith("run ") + ? "run" + : tail.startsWith("ensure ") + ? "ensure" + : null; + if (keyword) { + const afterKeyword = args.slice(i + keyword.length).trimStart(); + const skipped = args.slice(i + keyword.length).length - afterKeyword.length; + const call = parseCallRef(afterKeyword); + if (call && (call.rest.length === 0 || /^\s/.test(call.rest))) { + const consumed = afterKeyword.length - call.rest.length; + tokens.push(`${keyword} ${call.ref}(${formatArgs(call.args ?? "", call.bareIdentifierArgs)})`); + i += keyword.length + skipped + consumed; + continue; + } + } if (args[i] === '"') { let j = i + 1; while (j < args.length && !(args[j] === '"' && args[j - 1] !== "\\")) j++; diff --git a/src/runtime/docker.test.ts b/src/runtime/docker.test.ts index ce422847..9afbe728 100644 --- a/src/runtime/docker.test.ts +++ b/src/runtime/docker.test.ts @@ -207,7 +207,7 @@ test("resolveDockerConfig: workspace from in-file", () => { // buildDockerArgs // --------------------------------------------------------------------------- -test("buildDockerArgs: workspace ro + overlay-ro + sandbox run rw + fuse device", () => { +test("buildDockerArgs: workspace-ro + sandbox run rw + fuse device", () => { const opts = defaultOpts({ runArgs: ["arg1"] }); const args = buildDockerArgs(opts, TEST_OVERLAY); @@ -223,15 +223,11 @@ test("buildDockerArgs: workspace ro + overlay-ro + sandbox run rw + fuse device" const vFlags = args.filter((_, i) => i > 0 && args[i - 1] === "-v"); - // Workspace ro - const wsMount = vFlags.find((v) => v.includes("/jaiph/workspace:")); - assert.ok(wsMount, "workspace mount present"); - assert.ok(wsMount!.endsWith(":ro"), "workspace must be ro"); - // Overlay lower-layer ro const wsRoMount = vFlags.find((v) => v.includes("/jaiph/workspace-ro:")); assert.ok(wsRoMount, "workspace-ro mount present"); assert.ok(wsRoMount!.endsWith(":ro"), "workspace-ro must be ro"); + assert.ok(!vFlags.some((v) => v.includes("/jaiph/workspace:")), "workspace mount must stay writable inside image"); // Sandbox run dir rw const runMount = vFlags.find((v) => v.includes("/jaiph/run:")); @@ -243,8 +239,8 @@ test("buildDockerArgs: workspace ro + overlay-ro + sandbox run rw + fuse device" assert.ok(overlayMount, "overlay script mount present"); assert.ok(overlayMount!.endsWith(":ro"), "overlay script must be ro"); - // Total: 2 workspace (primary + -ro) + 1 run + 1 overlay script = 4 - assert.equal(vFlags.length, 4); + // Total: 1 workspace-ro + 1 run + 1 overlay script = 3 + assert.equal(vFlags.length, 3); // Command: overlay-run.sh → jaiph run --raw assert.ok(args.includes("/jaiph/overlay-run.sh")); @@ -290,7 +286,7 @@ test("buildDockerArgs: overrides JAIPH_WORKSPACE and JAIPH_RUNS_DIR", () => { assert.ok(!args.some((a) => a === "JAIPH_RUNS_DIR=/host/runs")); }); -test("buildDockerArgs: multiple workspace mounts all forced ro", () => { +test("buildDockerArgs: multiple workspace mounts only lower-layer paths are mounted ro", () => { const opts = defaultOpts({ config: { ...defaultOpts().config, @@ -302,11 +298,11 @@ test("buildDockerArgs: multiple workspace mounts all forced ro", () => { }); const args = buildDockerArgs(opts, TEST_OVERLAY); const vFlags = args.filter((_, i) => i > 0 && args[i - 1] === "-v"); - // 2 configured × 2 (primary + -ro) + 1 run + 1 overlay script = 6 - assert.equal(vFlags.length, 6); - assert.ok(vFlags.some((v) => v.includes("/jaiph/workspace:") && v.endsWith(":ro"))); + // 2 configured lower-layer mounts + 1 run + 1 overlay script = 4 + assert.equal(vFlags.length, 4); + assert.ok(!vFlags.some((v) => v.includes("/jaiph/workspace:") && v.endsWith(":ro"))); assert.ok(vFlags.some((v) => v.includes("/jaiph/workspace-ro:") && v.endsWith(":ro"))); - assert.ok(vFlags.some((v) => v.includes("/jaiph/workspace/config:") && v.endsWith(":ro"))); + assert.ok(!vFlags.some((v) => v.includes("/jaiph/workspace/config:") && v.endsWith(":ro"))); assert.ok(vFlags.some((v) => v.includes("/jaiph/workspace-ro/config:") && v.endsWith(":ro"))); }); @@ -413,6 +409,10 @@ test("writeOverlayScript: creates executable script with fuse-overlayfs setup", const content = readFileSync(scriptPath, "utf8"); assert.ok(content.startsWith("#!/usr/bin/env bash")); assert.ok(content.includes("fuse-overlayfs")); + assert.ok(content.includes("workspace overlay unavailable")); + assert.ok(content.includes("using copy fallback")); + assert.ok(content.includes('rsync -a --delete "$LOWER"/ "$MERGED"/')); + assert.ok(content.includes("mktemp \"$MERGED/.jaiph-overlay-probe.XXXXXX\"")); assert.ok(content.includes('exec "$@"')); } finally { rmSync(dirname(scriptPath), { recursive: true, force: true }); diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts index be76a754..3382c1a3 100644 --- a/src/runtime/docker.ts +++ b/src/runtime/docker.ts @@ -201,10 +201,14 @@ function installedPackageRoot(): string { function autoRuntimeImageTag(baseImage: string, packageRoot: string): string { const packageJsonPath = join(packageRoot, "package.json"); const cliPath = join(packageRoot, "dist", "src", "cli.js"); + const dockerRuntimePath = join(packageRoot, "dist", "src", "runtime", "docker.js"); + const nodeWorkflowRuntimePath = join(packageRoot, "dist", "src", "runtime", "kernel", "node-workflow-runtime.js"); const packageStamp = existsSync(packageJsonPath) ? statSync(packageJsonPath).mtimeMs : 0; const cliStamp = existsSync(cliPath) ? statSync(cliPath).mtimeMs : 0; + const dockerRuntimeStamp = existsSync(dockerRuntimePath) ? statSync(dockerRuntimePath).mtimeMs : 0; + const nodeWorkflowRuntimeStamp = existsSync(nodeWorkflowRuntimePath) ? statSync(nodeWorkflowRuntimePath).mtimeMs : 0; const digest = createHash("sha256") - .update(`${baseImage}|${resolve(packageRoot)}|${packageStamp}|${cliStamp}`) + .update(`${baseImage}|${resolve(packageRoot)}|${packageStamp}|${cliStamp}|${dockerRuntimeStamp}|${nodeWorkflowRuntimeStamp}`) .digest("hex") .slice(0, 12); return `${AUTO_RUNTIME_IMAGE_REPO}:${digest}`; @@ -223,6 +227,40 @@ function imageHasJaiph(image: string): boolean { } } +function imageConfiguredUser(image: string): string | undefined { + try { + const raw = execFileSync( + "docker", + ["image", "inspect", image, "--format", "{{json .Config.User}}"], + { encoding: "utf8", timeout: 30_000 }, + ).trim(); + const parsed = JSON.parse(raw) as string; + return parsed.length > 0 ? parsed : undefined; + } catch { + return undefined; + } +} + +function imageHomeDir(image: string): string | undefined { + try { + const raw = execFileSync( + "docker", + ["image", "inspect", image, "--format", "{{json .Config.Env}}"], + { encoding: "utf8", timeout: 30_000 }, + ).trim(); + const envList = JSON.parse(raw) as string[]; + for (const entry of envList) { + if (entry.startsWith("HOME=")) { + const value = entry.slice("HOME=".length); + return value.length > 0 ? value : undefined; + } + } + } catch { + // Fall through. + } + return undefined; +} + function buildRuntimeImageFromLocalPackage(baseImage: string, packageRoot: string, tag: string): string { const contextDir = mkdtempSync(join(tmpdir(), "jaiph-runtime-image-")); try { @@ -234,12 +272,19 @@ function buildRuntimeImageFromLocalPackage(baseImage: string, packageRoot: strin if (!tarballName) { throw new Error("npm pack produced no tarball"); } + const originalUser = imageConfiguredUser(baseImage); + const originalHome = imageHomeDir(baseImage); writeFileSync( join(contextDir, "Dockerfile"), [ `FROM ${baseImage}`, + `USER root`, `COPY ${tarballName} /tmp/${tarballName}`, - `RUN npm install -g /tmp/${tarballName} && rm -f /tmp/${tarballName}`, + `RUN npm install -g /tmp/${tarballName} && rm -f /tmp/${tarballName}` + + (originalHome + ? ` && JAIPH_NPM_BIN="$(npm prefix -g)/bin/jaiph" && mkdir -p ${originalHome}/.local/bin && ln -sf "$JAIPH_NPM_BIN" ${originalHome}/.local/bin/jaiph` + : ""), + ...(originalUser ? [`USER ${originalUser}`] : []), "", ].join("\n"), ); @@ -255,6 +300,18 @@ function buildRuntimeImageFromLocalPackage(baseImage: string, packageRoot: strin } } +function ensureLocalRuntimeImage(baseImage: string): string { + pullImageIfNeeded(baseImage); + const packageRoot = installedPackageRoot(); + const tag = autoRuntimeImageTag(baseImage, packageRoot); + try { + execSync(`docker image inspect ${tag}`, { stdio: "ignore", timeout: 30_000 }); + return tag; + } catch { + return buildRuntimeImageFromLocalPackage(baseImage, packageRoot, tag); + } +} + function ensureImageHasJaiph(baseImage: string): string { pullImageIfNeeded(baseImage); if (imageHasJaiph(baseImage)) { @@ -285,6 +342,7 @@ export function resolveImage(config: DockerRunConfig, workspaceRoot: string): st if (existsSync(dockerfilePath)) { baseImage = buildImageFromDockerfile(dockerfilePath); } + return ensureLocalRuntimeImage(baseImage); } return ensureImageHasJaiph(baseImage); } @@ -299,9 +357,51 @@ LOWER=/jaiph/workspace-ro UPPER=/tmp/overlay-upper WORK=/tmp/overlay-work MERGED=/jaiph/workspace -mkdir -p "$UPPER" "$WORK" +mkdir -p "$UPPER" "$WORK" "$MERGED" +overlay_ok=0 +overlay_reason="" if command -v fuse-overlayfs >/dev/null 2>&1 && [ -e /dev/fuse ]; then - fuse-overlayfs -o "lowerdir=$LOWER,upperdir=$UPPER,workdir=$WORK" "$MERGED" 2>/dev/null || true + if fuse-overlayfs -o "lowerdir=$LOWER,upperdir=$UPPER,workdir=$WORK" "$MERGED" 2>/tmp/jaiph-fuse-overlay.err; then + probe_path="$(mktemp "$MERGED/.jaiph-overlay-probe.XXXXXX" 2>/dev/null || true)" + if [ -n "$probe_path" ]; then + rm -f "$probe_path" + overlay_ok=1 + else + overlay_reason="fuse-overlayfs mounted but workspace is still not writable" + fi + else + overlay_reason="$(tr '\n' ' ' /dev/null 2>&1; then + if rsync -a --delete "$LOWER"/ "$MERGED"/ 2>/tmp/jaiph-workspace-copy.err; then + printf 'jaiph docker: workspace overlay unavailable; using copy fallback at /jaiph/workspace' >&2 + if [ -n "$overlay_reason" ]; then + printf ' (%s)' "$overlay_reason" >&2 + fi + printf '\n' >&2 + overlay_ok=1 + else + copy_reason="$(tr '\n' ' ' &2 + if [ -n "$overlay_reason" ]; then + printf ' (%s)' "$overlay_reason" >&2 + fi + if [ -n "$copy_reason" ]; then + printf ' [copy fallback: %s]' "$copy_reason" >&2 + fi + printf '\n' >&2 + fi + else + printf 'jaiph docker: workspace overlay unavailable and rsync copy fallback is unavailable; /jaiph/workspace may be incomplete' >&2 + if [ -n "$overlay_reason" ]; then + printf ' (%s)' "$overlay_reason" >&2 + fi + printf '\n' >&2 + fi fi exec "$@" `; @@ -386,13 +486,13 @@ export function overlayMountPath(containerPath: string): string { * Build the `docker run --rm` argument list. * * Mounts: - * 1. workspace → /jaiph/workspace:ro (fallback when overlay absent) - * 2. workspace → /jaiph/workspace-ro:ro (overlay lower layer) - * 3. sandboxRunDir → /jaiph/run:rw (single run artifacts) + * 1. workspace → /jaiph/workspace-ro:ro (overlay lower layer / copy source) + * 2. sandboxRunDir → /jaiph/run:rw (single run artifacts) * - * overlay-run.sh (baked in image) creates a fuse-overlayfs CoW at - * /jaiph/workspace using -ro as lower. /jaiph/run is outside the overlay - * so writes go directly to the host mount — no symlink needed. + * The image already contains a writable `/jaiph/workspace` directory. + * `overlay-run.sh` mounts `fuse-overlayfs` there when available; otherwise it + * copies the lower layer into that directory as a writable fallback. `/jaiph/run` + * is outside the overlay, so run artifacts still persist to the host mount. * * The container runs `jaiph run --raw ` using its own installed jaiph. */ @@ -415,10 +515,9 @@ export function buildDockerArgs(opts: DockerSpawnOptions, overlayScriptPath: str args.push("--network", opts.config.network); } - // Workspace: ro at primary path (fallback) + overlay lower layer path + // Workspace inputs: mounted only at the overlay lower-layer path. for (const mount of opts.config.mounts) { const hostAbs = resolve(opts.workspaceRoot, mount.hostPath); - args.push("-v", `${hostAbs}:${mount.containerPath}:ro`); args.push("-v", `${hostAbs}:${overlayMountPath(mount.containerPath)}:ro`); } diff --git a/src/runtime/kernel/node-workflow-runtime.ts b/src/runtime/kernel/node-workflow-runtime.ts index abc01561..c6ee76ad 100644 --- a/src/runtime/kernel/node-workflow-runtime.ts +++ b/src/runtime/kernel/node-workflow-runtime.ts @@ -12,6 +12,7 @@ import { buildStepDisplayParamPairs } from "../../cli/commands/format-params.js" import { resolveRuleRef, resolveScriptRef, resolveWorkflowRef, type RuntimeGraph } from "./graph"; import type { WorkflowMetadata } from "../../types"; import { extractJson, validateFields } from "./schema"; +import { parseCallRef } from "../../parse/core"; import { plainMultilineOrchestrationForRuntime, tripleQuotedRawForRuntime, @@ -159,6 +160,79 @@ function parseArgsRaw(raw: string, vars: Map, env?: NodeJS.Proce return out; } +type ParsedArgToken = + | { kind: "literal"; value: string } + | { kind: "managed"; managedKind: "run" | "ensure"; ref: string; argsRaw: string }; + +function parseManagedArgAt(raw: string, start: number): { token: ParsedArgToken; next: number } | null { + const tail = raw.slice(start); + const keyword = tail.startsWith("run ") + ? "run" + : tail.startsWith("ensure ") + ? "ensure" + : null; + if (!keyword) return null; + const afterKeyword = raw.slice(start + keyword.length).trimStart(); + const skipped = raw.slice(start + keyword.length).length - afterKeyword.length; + const call = parseCallRef(afterKeyword); + if (!call) return null; + if (call.rest.length > 0 && !/^\s/.test(call.rest)) return null; + const consumed = afterKeyword.length - call.rest.length; + return { + token: { + kind: "managed", + managedKind: keyword, + ref: call.ref, + argsRaw: call.args ?? "", + }, + next: start + keyword.length + skipped + consumed, + }; +} + +function parseArgTokens(raw: string): ParsedArgToken[] { + if (!raw.trim()) return []; + const out: ParsedArgToken[] = []; + let i = 0; + while (i < raw.length) { + while (i < raw.length && /\s/.test(raw[i]!)) i += 1; + if (i >= raw.length) break; + const managed = parseManagedArgAt(raw, i); + if (managed) { + out.push(managed.token); + i = managed.next; + continue; + } + let cur = ""; + let quote: "'" | '"' | null = null; + while (i < raw.length) { + const ch = raw[i]!; + if (quote) { + if (ch === quote) { + quote = null; + } else { + cur += ch; + } + i += 1; + continue; + } + if (ch === "'" || ch === '"') { + quote = ch; + i += 1; + continue; + } + if (/\s/.test(ch)) { + break; + } + cur += ch; + i += 1; + } + if (cur.length > 0) { + out.push({ kind: "literal", value: cur }); + } + } + return out; +} + function stripOuterQuotes(value: string): string { if (value.length >= 2) { const first = value[0]; @@ -1223,8 +1297,32 @@ export class NodeWorkflowRuntime { return `${filePath}::${name}`; } + private async resolveArgsRaw(scope: Scope, raw: string | string[]): Promise { + if (Array.isArray(raw)) { + return raw; + } + const tokens = parseArgTokens(raw); + const resolved: string[] = []; + for (const token of tokens) { + if (token.kind === "literal") { + resolved.push(interpolate(token.value, scope.vars, scope.env)); + continue; + } + const result = token.managedKind === "run" + ? await this.executeRunRef(scope, token.ref, token.argsRaw) + : await this.executeEnsureRef(scope, token.ref, token.argsRaw, undefined); + if (result.status !== 0) { + return result; + } + resolved.push(result.returnValue ?? result.output.trim()); + } + return resolved; + } + private async executeRunRef(scope: Scope, ref: string, argsRaw: string | string[]): Promise { - const args = Array.isArray(argsRaw) ? argsRaw : parseArgsRaw(argsRaw, scope.vars, scope.env); + const resolvedArgs = await this.resolveArgsRaw(scope, argsRaw); + if (!Array.isArray(resolvedArgs)) return resolvedArgs; + const args = resolvedArgs; const resolvedWorkflow = resolveWorkflowRef(this.graph, scope.filePath, { value: ref, loc: { line: 1, col: 1 } }); if (resolvedWorkflow) { const mk = this.mockKey(resolvedWorkflow.filePath, resolvedWorkflow.workflow.name); @@ -1263,7 +1361,9 @@ export class NodeWorkflowRuntime { argsRaw: string, recover: EnsureRecover | undefined, ): Promise { - const args = parseArgsRaw(argsRaw, scope.vars, scope.env); + const resolvedArgs = await this.resolveArgsRaw(scope, argsRaw); + if (!Array.isArray(resolvedArgs)) return resolvedArgs; + const args = resolvedArgs; const attempt = async (): Promise => { const resolvedRule = resolveRuleRef(this.graph, scope.filePath, { value: ref, loc: { line: 1, col: 1 } }); if (!resolvedRule) return { status: 1, output: "", error: `Unknown ensure target: ${ref}` }; @@ -1353,7 +1453,9 @@ export class NodeWorkflowRuntime { shebang: string | undefined, argsRaw: string, ): Promise { - const args = parseArgsRaw(argsRaw, scope.vars, scope.env); + const resolvedArgs = await this.resolveArgsRaw(scope, argsRaw); + if (!Array.isArray(resolvedArgs)) return resolvedArgs; + const args = resolvedArgs; const scriptName = inlineScriptName(body, shebang); return this.executeManagedStep( "script", diff --git a/src/transpile/validate-managed-calls.test.ts b/src/transpile/validate-managed-calls.test.ts index 828a4890..d5bb238b 100644 --- a/src/transpile/validate-managed-calls.test.ts +++ b/src/transpile/validate-managed-calls.test.ts @@ -184,6 +184,29 @@ test("bare identifier arg: unknown name fails E_VALIDATE", () => { } }); +test("E_VALIDATE: nested call-like arg requires explicit run or ensure", () => { + const root = mkdtempSync(join(tmpdir(), "jaiph-val-nested-call-")); + try { + writeFileSync( + join(root, "m.jh"), + [ + 'script mkdir_p_simple = `mkdir -p "$1"`', + 'script jaiph_tmp_dir = `printf "%s\\n" "$JAIPH_WORKSPACE/.jaiph/tmp"`', + "workflow default() {", + " run mkdir_p_simple(jaiph_tmp_dir())", + "}", + "", + ].join("\n"), + ); + assert.throws( + () => buildScripts(join(root, "m.jh"), join(root, "out")), + /nested managed calls in argument position must be explicit/, + ); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + test("bare identifier arg: capture variable passes validation", () => { const root = mkdtempSync(join(tmpdir(), "jaiph-val-bare-cap-")); const out = join(root, "out"); diff --git a/src/transpile/validate.ts b/src/transpile/validate.ts index edb2b747..5b3d18ba 100644 --- a/src/transpile/validate.ts +++ b/src/transpile/validate.ts @@ -257,6 +257,53 @@ function validateBareIdentifierArgs( } } +function stripQuotedArgContent(args: string): string { + let out = ""; + let quote: "'" | '"' | null = null; + for (let i = 0; i < args.length; i += 1) { + const ch = args[i]!; + if (quote) { + if (ch === quote && args[i - 1] !== "\\") { + quote = null; + } + out += " "; + continue; + } + if (ch === "'" || ch === '"') { + quote = ch; + out += " "; + continue; + } + out += ch; + } + return out; +} + +function validateNestedManagedCallArgs( + filePath: string, + loc: { line: number; col: number }, + args: string | undefined, +): void { + if (!args) return; + const stripped = stripQuotedArgContent(args); + const re = /\b([A-Za-z_][A-Za-z0-9_.]*)\s*\(/g; + let match: RegExpExecArray | null; + while ((match = re.exec(stripped)) !== null) { + const before = stripped.slice(0, match.index).trimEnd(); + const lastToken = before.length === 0 ? "" : before.slice(before.lastIndexOf(" ") + 1); + if (lastToken === "run" || lastToken === "ensure") { + continue; + } + throw jaiphError( + filePath, + loc.line, + loc.col, + "E_VALIDATE", + `nested managed calls in argument position must be explicit; use "run ${match[1]}(...)" or "ensure ${match[1]}(...)" inside the argument list`, + ); + } +} + /** Resolve a route target workflow ref to its declared parameter count. Returns undefined if unresolvable. */ function resolveRouteTargetParams( ref: string, @@ -474,6 +521,7 @@ export function validateReferences(ast: jaiphModule, ctx: ValidateContext): void } if (s.type === "ensure") { validateNoShellRedirection(ast.filePath, s.ref.loc, "ensure", s.args); + validateNestedManagedCallArgs(ast.filePath, s.ref.loc, s.args); validateRef(s.ref, ast, refCtx, expectRuleRef); validateArity(ast.filePath, s.ref.loc, s.ref.value, s.args, "rule", ast, refCtx); @@ -488,6 +536,7 @@ export function validateReferences(ast: jaiphModule, ctx: ValidateContext): void } if (s.type === "run") { validateNoShellRedirection(ast.filePath, s.workflow.loc, "run", s.args); + validateNestedManagedCallArgs(ast.filePath, s.workflow.loc, s.args); if (s.async) { throw jaiphError( ast.filePath, @@ -572,12 +621,14 @@ export function validateReferences(ast: jaiphModule, ctx: ValidateContext): void if (s.managed) { if (s.managed.kind === "run") { validateNoShellRedirection(ast.filePath, s.managed.ref.loc, "run", s.managed.args); + validateNestedManagedCallArgs(ast.filePath, s.managed.ref.loc, s.managed.args); validateRef(s.managed.ref, ast, refCtx, expectRunInRuleRef); validateArity(ast.filePath, s.managed.ref.loc, s.managed.ref.value, s.managed.args, "workflow", ast, refCtx); validateBareIdentifierArgs(ast.filePath, s.managed.ref.loc, s.managed.bareIdentifierArgs, ruleKnownVars); } else if (s.managed.kind === "ensure") { validateNoShellRedirection(ast.filePath, s.managed.ref.loc, "ensure", s.managed.args); + validateNestedManagedCallArgs(ast.filePath, s.managed.ref.loc, s.managed.args); validateRef(s.managed.ref, ast, refCtx, expectRuleRef); validateArity(ast.filePath, s.managed.ref.loc, s.managed.ref.value, s.managed.args, "rule", ast, refCtx); @@ -610,6 +661,7 @@ export function validateReferences(ast: jaiphModule, ctx: ValidateContext): void const v = s.value; if (v.kind === "run_capture") { validateNoShellRedirection(ast.filePath, v.ref.loc, "run", v.args); + validateNestedManagedCallArgs(ast.filePath, v.ref.loc, v.args); if (!v.ref.value.includes(".") && ruleKnownVars.has(v.ref.value) && !localScripts.has(v.ref.value)) { throw jaiphError(ast.filePath, v.ref.loc.line, v.ref.loc.col, "E_VALIDATE", `strings are not executable; "${v.ref.value}" is a string — use a script instead`); } @@ -619,6 +671,7 @@ export function validateReferences(ast: jaiphModule, ctx: ValidateContext): void validateBareIdentifierArgs(ast.filePath, v.ref.loc, v.bareIdentifierArgs, ruleKnownVars); } else if (v.kind === "ensure_capture") { validateNoShellRedirection(ast.filePath, v.ref.loc, "ensure", v.args); + validateNestedManagedCallArgs(ast.filePath, v.ref.loc, v.args); validateRef(v.ref, ast, refCtx, expectRuleRef); validateArity(ast.filePath, v.ref.loc, v.ref.value, v.args, "rule", ast, refCtx); @@ -765,6 +818,7 @@ export function validateReferences(ast: jaiphModule, ctx: ValidateContext): void validateChannelRef(s.channel, s.loc); if (s.rhs.kind === "run") { validateNoShellRedirection(ast.filePath, s.rhs.ref.loc, "run", s.rhs.args); + validateNestedManagedCallArgs(ast.filePath, s.rhs.ref.loc, s.rhs.args); validateRef(s.rhs.ref, ast, refCtx, expectRunTargetRef); validateArity(ast.filePath, s.rhs.ref.loc, s.rhs.ref.value, s.rhs.args, "workflow", ast, refCtx); @@ -799,6 +853,7 @@ export function validateReferences(ast: jaiphModule, ctx: ValidateContext): void } if (s.type === "ensure") { validateNoShellRedirection(ast.filePath, s.ref.loc, "ensure", s.args); + validateNestedManagedCallArgs(ast.filePath, s.ref.loc, s.args); validateRef(s.ref, ast, refCtx, expectRuleRef); validateArity(ast.filePath, s.ref.loc, s.ref.value, s.args, "rule", ast, refCtx); @@ -813,6 +868,7 @@ export function validateReferences(ast: jaiphModule, ctx: ValidateContext): void } if (s.type === "run") { validateNoShellRedirection(ast.filePath, s.workflow.loc, "run", s.args); + validateNestedManagedCallArgs(ast.filePath, s.workflow.loc, s.args); if (!s.workflow.value.includes(".") && wfKnownVars.has(s.workflow.value) && !localScripts.has(s.workflow.value) && !localWorkflows.has(s.workflow.value)) { throw jaiphError(ast.filePath, s.workflow.loc.line, s.workflow.loc.col, "E_VALIDATE", `strings are not executable; "${s.workflow.value}" is a string — use a script instead`); } @@ -899,12 +955,14 @@ export function validateReferences(ast: jaiphModule, ctx: ValidateContext): void if (s.managed) { if (s.managed.kind === "run") { validateNoShellRedirection(ast.filePath, s.managed.ref.loc, "run", s.managed.args); + validateNestedManagedCallArgs(ast.filePath, s.managed.ref.loc, s.managed.args); validateRef(s.managed.ref, ast, refCtx, expectRunTargetRef); validateArity(ast.filePath, s.managed.ref.loc, s.managed.ref.value, s.managed.args, "workflow", ast, refCtx); validateBareIdentifierArgs(ast.filePath, s.managed.ref.loc, s.managed.bareIdentifierArgs, wfKnownVars, recoverBindings); } else if (s.managed.kind === "ensure") { validateNoShellRedirection(ast.filePath, s.managed.ref.loc, "ensure", s.managed.args); + validateNestedManagedCallArgs(ast.filePath, s.managed.ref.loc, s.managed.args); validateRef(s.managed.ref, ast, refCtx, expectRuleRef); validateArity(ast.filePath, s.managed.ref.loc, s.managed.ref.value, s.managed.args, "rule", ast, refCtx); @@ -957,6 +1015,7 @@ export function validateReferences(ast: jaiphModule, ctx: ValidateContext): void const v = s.value; if (v.kind === "run_capture") { validateNoShellRedirection(ast.filePath, v.ref.loc, "run", v.args); + validateNestedManagedCallArgs(ast.filePath, v.ref.loc, v.args); if (!v.ref.value.includes(".") && wfKnownVars.has(v.ref.value) && !localScripts.has(v.ref.value) && !localWorkflows.has(v.ref.value)) { throw jaiphError(ast.filePath, v.ref.loc.line, v.ref.loc.col, "E_VALIDATE", `strings are not executable; "${v.ref.value}" is a string — use a script instead`); } @@ -966,6 +1025,7 @@ export function validateReferences(ast: jaiphModule, ctx: ValidateContext): void validateBareIdentifierArgs(ast.filePath, v.ref.loc, v.bareIdentifierArgs, wfKnownVars, recoverBindings); } else if (v.kind === "ensure_capture") { validateNoShellRedirection(ast.filePath, v.ref.loc, "ensure", v.args); + validateNestedManagedCallArgs(ast.filePath, v.ref.loc, v.args); validateRef(v.ref, ast, refCtx, expectRuleRef); validateArity(ast.filePath, v.ref.loc, v.ref.value, v.args, "rule", ast, refCtx); diff --git a/test/sample-build.test.ts b/test/sample-build.test.ts index 06d7f89e..2d8439bd 100644 --- a/test/sample-build.test.ts +++ b/test/sample-build.test.ts @@ -173,6 +173,40 @@ test("jaiph run compiles and executes workflow with args", () => { } }); +test("jaiph run resolves nested managed call arguments", () => { + const root = mkdtempSync(join(tmpdir(), "jaiph-run-nested-args-")); + try { + const filePath = join(root, "nested_args.jh"); + writeFileSync( + filePath, + [ + "script mkdir_p_simple = ```", + 'mkdir -p "$1"', + "```", + "script jaiph_tmp_dir = ```", + 'printf "%s\\n" "$JAIPH_WORKSPACE/.jaiph/tmp"', + "```", + "workflow default() {", + " run mkdir_p_simple(run jaiph_tmp_dir())", + "}", + "", + ].join("\n"), + ); + + const cliPath = join(process.cwd(), "dist/src/cli.js"); + const runResult = spawnSync("node", [cliPath, "run", filePath], { + encoding: "utf8", + cwd: root, + env: { ...process.env, JAIPH_DOCKER_ENABLED: "false" }, + }); + + assert.equal(runResult.status, 0, runResult.stderr); + assert.equal(existsSync(join(root, ".jaiph", "tmp")), true); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + test("executable .jh invokes jaiph run semantics", () => { const root = mkdtempSync(join(tmpdir(), "jaiph-exec-jh-")); try { From d25fb3f8dd89ad1a5001af381cbf5cfed34ba9d9 Mon Sep 17 00:00:00 2001 From: Jakub Dzikowski Date: Fri, 17 Apr 2026 21:07:56 +0200 Subject: [PATCH 03/38] Feat: Require explicit managed calls in nested argument lists Enforce that nested call-like expressions inside argument positions must use an explicit `run` or `ensure` keyword. Bare call-like forms (`run foo(bar())`, `run foo(rule_bar())`, `run foo(\`echo x\`())`, `const x = bar()`) are now rejected at compile time with actionable error messages. The explicit forms (`run foo(run bar())`, `run foo(ensure rule_bar())`, `run foo(run \`echo x\`())`) execute the nested call first and pass the result as a single argument. Validator extended with inline script detection, runtime evaluates managed argument tokens before outer dispatch, and the formatter round-trips all valid nested forms. Regression tests cover all accepted and rejected patterns. Docs and grammar updated. Co-Authored-By: Claude Opus 4.6 --- CHANGELOG.md | 4 + QUEUE.md | 88 ----------- docs/grammar.md | 34 +++- docs/jaiph-skill.md | 2 +- docs/language.md | 30 ++++ src/format/emit.ts | 37 +++++ src/runtime/docker.ts | 21 ++- src/runtime/kernel/node-workflow-runtime.ts | 89 +++++++++-- src/transpile/validate-managed-calls.test.ts | 157 +++++++++++++++++++ src/transpile/validate.ts | 17 ++ 10 files changed, 371 insertions(+), 108 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eda20425..cd5409d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Unreleased +## All changes + +- **Feature — Language/Runtime:** Explicit nested managed calls in argument position — Call arguments can now contain nested managed calls using `run` or `ensure` keywords explicitly: `run foo(run bar())`, `run foo(ensure rule_bar())`, and `run foo(run \`echo "aaa"\`())`. The nested call executes first and its result is passed as a single argument to the outer call. Bare call-like forms in argument position are rejected at compile time: `run foo(bar())` → `E_VALIDATE` with an actionable message telling the user to add `run` or `ensure`. Bare inline script calls in argument position (`run foo(\`echo aaa\`())`) are also rejected with guidance. The explicit capture-then-pass form (`const x = run bar()` followed by `run foo(x)`) remains valid. Bare call-like forms in `const` assignments (`const x = bar()`) are also rejected — use `const x = run bar()`. The formatter round-trips explicit nested forms correctly, including the inline script variant. The runtime evaluates nested managed argument tokens (workflows, scripts, rules, and inline scripts) before passing the result to the outer call. Implementation: validator (`src/transpile/validate.ts` — `validateNestedManagedCallArgs` extended for inline script detection), runtime (`src/runtime/kernel/node-workflow-runtime.ts` — `managed_inline_script` token kind, `parseInlineScriptAt`, `resolveArgsRawSync` fast path), formatter (`src/format/emit.ts` — `parseInlineScriptArg`, inline script formatting in `formatArgs`). Regression tests added for all valid and invalid forms. Docs updated (`docs/language.md`, `docs/grammar.md`, `docs/jaiph-skill.md`). + # 0.9.2 ## Summary diff --git a/QUEUE.md b/QUEUE.md index 7f3cd348..993c8f50 100644 --- a/QUEUE.md +++ b/QUEUE.md @@ -12,93 +12,6 @@ Process rules: *** -## Runtime/DSL — require explicit managed calls inside nested argument lists #dev-ready - -**Goal** -Keep managed execution explicit everywhere, including inside argument lists. Nested argument-position managed calls are valid **only** when they use the correct keyword: - -* `run foo(run bar())` -* `run foo(ensure rule_bar())` -* `run foo(run \`echo "aaa"\`())` - -The bare call-like forms must stay invalid: - -* `run foo(bar())` -* `run foo(rule_bar())` -* `run foo(\`echo "aaa"\`())` -* `const x = bar()` - -The explicit capture-then-pass form is also valid: - -* `const x = run bar()` -* `run foo(x)` - -This is a deliberate language rule: scripts/workflows are only executable via `run`, and rules are only executable via `ensure`, even when nested inside another call's args. - -**Why this task exists** -The runtime bug that treated `run foo(run bar())` as two literal argv tokens (`"run"`, `"bar()"`) proves the language intent is not being enforced correctly. The fix is to support **explicit** nested managed calls and reject **implicit** bare call-like execution. - -**Context** - -* Parser / call-arg parsing: `src/parse/core.ts`, `src/parse/workflows.ts`, `src/parse/steps.ts` -* Validation: `src/transpile/validate.ts`, `src/transpile/validate-managed-calls.test.ts` -* Runtime arg handling: `src/runtime/kernel/node-workflow-runtime.ts` -* Formatting / round-trip emit: `src/format/emit.ts` -* Real motivating workflow: `.jaiph/architect_review.jh` - -**Implementation requirements** - -1. **Language rule** - * Managed steps remain explicit everywhere. - * Inside call arguments, nested execution is allowed only via: - * `run ref(...)` - * `ensure ref(...)` - * `run \`...\`()` - * Bare nested call-like forms are invalid and must not be interpreted as executable. - -2. **Compiler behavior** - * Accept explicit nested managed calls in arg position. - * Reject bare nested call-like syntax in arg position with a clear parse/validate error. - * Reject bare call-like execution in `const` assignments as well. - * Error message must tell the user to add the missing `run` / `ensure` keyword, not to rely on implicit execution. - -3. **Runtime behavior** - * Runtime arg evaluation must execute explicit nested managed calls correctly and pass their resulting value as a single argument. - * Runtime must never interpret a bare call-like token as executable work. - -4. **Formatter / emitter** - * Ensure formatting round-trips valid nested explicit forms like `run foo(run bar())`. - * Ensure invalid bare nested call-like forms are not generated by the emitter. - -5. **Workflow update** - * Keep or update `.jaiph/architect_review.jh` to use the explicit nested form: - * `run mkdir_p_simple(run jaiph_tmp_dir())` - -6. **Tests** - * Add/keep a regression test proving `run mkdir_p_simple(jaiph_tmp_dir())` is rejected. - * Add/keep a regression test proving `run foo(rule_bar())` is rejected. - * Add/keep a regression test proving `run foo(\`echo "aaa"\`())` is rejected. - * Add/keep a regression test proving `const x = bar()` is rejected. - * Add/keep a positive test for `run mkdir_p_simple(run jaiph_tmp_dir())`. - * Add/keep a positive test for `run foo(ensure rule_bar())`. - * Add/keep a positive test for nested inline script form `run foo(run \`echo "aaa"\`())`. - * Add/keep a positive test for `const x = run bar()` followed by `run foo(x)`. - -**Acceptance criteria** - -* `run foo(run bar())` succeeds and passes the nested result as one argument. -* `run foo(ensure rule_bar())` succeeds and passes the nested result as one argument. -* `run foo(run \`echo "aaa"\`())` succeeds and passes the nested result as one argument. -* `run foo(bar())` fails at compile time with an actionable error. -* `run foo(rule_bar())` fails at compile time with an actionable error. -* `run foo(\`echo "aaa"\`())` fails at compile time with an actionable error. -* `const x = run bar()` is accepted. -* `const x = bar()` fails at compile time with an actionable error. -* `.jaiph/architect_review.jh` uses the explicit nested form. -* Tests prevent regression back to implicit bare nested execution. - -*** - ## Docker — strict image contract + publish official `jaiph-runtime` images to GHCR **Goal** @@ -320,4 +233,3 @@ Extend the language with workflow parameters: `workflow analyze(file: string, de * E2E test passes. *** - diff --git a/docs/grammar.md b/docs/grammar.md index 68e10383..d3707263 100644 --- a/docs/grammar.md +++ b/docs/grammar.md @@ -195,6 +195,33 @@ This rule applies to all call sites: `run`, `ensure`, `return run`/`return ensur Bare identifiers must reference a known variable (`const`, capture, or named parameter). Unknown names produce an `E_VALIDATE` error at compile time. Jaiph keywords (`run`, `ensure`, `const`, etc.) cannot be used as bare identifier arguments. +### Nested Managed Calls in Arguments + +Call arguments can contain **explicit nested managed calls** using `run` or `ensure`. The nested call executes first and its result is passed as a single argument to the outer call. This is a deliberate language rule: managed execution must always be explicit — scripts and workflows execute only via `run`, rules only via `ensure`, even inside argument lists. + +**Valid explicit forms:** + +```jaiph +run mkdir_p_simple(run jaiph_tmp_dir()) # nested run +run do_work(ensure check_ok()) # nested ensure +run do_work(run `echo aaa`()) # nested inline script +``` + +**Invalid bare call-like forms** — rejected at compile time with actionable errors: + +```jaiph +# run do_work(bar()) — E_VALIDATE: nested managed calls must be explicit +# run do_work(rule_bar()) — E_VALIDATE: nested managed calls must be explicit +# run do_work(`echo aaa`()) — E_VALIDATE: nested inline scripts must be explicit +``` + +The **capture-then-pass** form is always valid: + +```jaiph +const x = run bar() +run foo(x) +``` + ### Arity Checking When the callee declares named parameters, the compiler validates that the number of arguments at the call site matches the number of declared parameters. A mismatch produces an `E_VALIDATE` error: @@ -841,6 +868,10 @@ ensure_stmt = "ensure" call_ref [ "catch" catch_bindings catch_body ] ; run_catch_stmt = "run" call_ref "catch" catch_bindings catch_body ; run_stmt = "run" ( call_ref | inline_script ) ; call_ref = REF "(" [ call_args ] ")" ; (* parentheses always required *) +call_arg = double_quoted_string | IDENT | "${" IDENT "}" + | "run" ( call_ref | inline_script ) (* explicit nested managed call *) + | "ensure" call_ref ; (* explicit nested ensure *) +call_args = call_arg { "," call_arg } ; inline_script = backtick_script_body "(" [ call_args ] ")" | fenced_script_block "(" [ call_args ] ")" ; prompt_body = double_quoted_string | IDENT | triple_quoted_block ; triple_quoted_block = "\"\"\"" newline { body_line newline } "\"\"\"" ; @@ -861,7 +892,7 @@ After parsing, the compiler validates references and config (`src/transpile/vali - **E_PARSE:** Invalid syntax — duplicate config, invalid keys/values, `$(…)` or `${var:-fallback}` in orchestration strings, `${...}` interpolation in script bodies, `prompt … returns` without capture, bare `ref(args)` in const RHS (use `run`/`ensure`/`prompt`), `local` at top level, unrecognized workflow/rule line, invalid send RHS, arguments after `catch`, bare `catch` with no recovery step, nested inline captures, shell redirection after `run`/`ensure`, invalid parameter names (non-identifier, duplicate, or reserved keyword), or missing `{` on definition line. - **E_SCHEMA:** Invalid `returns` schema — empty, non-flat, unsupported type (only `string`, `number`, `boolean`). -- **E_VALIDATE:** Reference errors — unknown rule/workflow, duplicate alias, `ensure` on non-rule, `run` on rule, `run` to workflow inside rule, `run async` in rule, forbidden Jaiph usage inside `$(…)`, dot notation on non-prompt variable or invalid field name, bare identifier argument referencing an unknown variable, `${identifier}` in strings referencing an unknown variable, standalone `"${identifier}"` in call arguments (use bare identifier instead), arity mismatch (call-site argument count differs from callee's declared parameter count), **type crossing** — `prompt` with a script name (`scripts are not promptable`), `run` with a string const (`strings are not executable`), `const x = scriptName` (`scripts are not values`), `${scriptName}` interpolation (`scripts cannot be interpolated`). +- **E_VALIDATE:** Reference errors — unknown rule/workflow, duplicate alias, `ensure` on non-rule, `run` on rule, `run` to workflow inside rule, `run async` in rule, forbidden Jaiph usage inside `$(…)`, dot notation on non-prompt variable or invalid field name, bare identifier argument referencing an unknown variable, `${identifier}` in strings referencing an unknown variable, standalone `"${identifier}"` in call arguments (use bare identifier instead), arity mismatch (call-site argument count differs from callee's declared parameter count), **bare nested managed calls** — `run foo(bar())` or `run foo(rule_bar())` without explicit `run`/`ensure` keyword, **bare nested inline script calls** — `run foo(\`echo aaa\`())` without explicit `run`, **type crossing** — `prompt` with a script name (`scripts are not promptable`), `run` with a string const (`strings are not executable`), `const x = scriptName` (`scripts are not values`), `${scriptName}` interpolation (`scripts cannot be interpolated`). - **E_IMPORT_NOT_FOUND:** Import target file does not exist. Validation rules: @@ -875,6 +906,7 @@ Validation rules: 7. `ensure … catch` and `run … catch` argument ordering: all arguments inside parentheses before `catch`. 8. Shell redirection (`>`, `|`, `&`) after `run`/`ensure` is rejected — use a script. 9. **Type crossing:** `string` and `script` are non-interchangeable primitive types (see [Types](#types)). `prompt` rejects script names; `run` rejects string consts; assigning a script to a `const` or interpolating a script name with `${…}` is rejected. Each crossing produces an actionable `E_VALIDATE` message. +10. **Explicit nested managed calls:** Bare call-like forms in argument position (`run foo(bar())`, `run foo(rule_bar())`) are rejected — add the missing `run` or `ensure` keyword. Bare inline script calls in arguments (`run foo(\`echo aaa\`())`) are also rejected — add `run`. Valid forms: `run foo(run bar())`, `run foo(ensure rule_bar())`, `run foo(run \`echo aaa\`())`. ## Build Artifacts {#build-artifacts} diff --git a/docs/jaiph-skill.md b/docs/jaiph-skill.md index a7dd9c30..f170b6ef 100644 --- a/docs/jaiph-skill.md +++ b/docs/jaiph-skill.md @@ -95,7 +95,7 @@ Prefer composable modules over one large file. - **Module-scoped variables:** `local name = value` or `const name = value` (same value forms). Prefer **`const`** for new files. Values can be single-line `"..."` strings, triple-quoted `"""..."""` multiline strings, or bare tokens. A double-quoted string that spans multiple lines is rejected — use `"""..."""` instead. Accessible as `${name}` inside orchestration strings in the same module. Names share the unified namespace with channels, rules, workflows, and scripts — duplicates are `E_PARSE`. Not exportable; module-scoped only. - **Steps:** - **ensure** — `ensure ref` or `ensure ref([args...])` runs a rule (local or `alias.rule_name`). **Parentheses are optional when passing zero arguments** — `ensure check` is equivalent to `ensure check()`. When arguments are present, parentheses are required with comma-separated expressions. **Bare identifier arguments** are supported and preferred: `ensure check(status)` is equivalent to `ensure check("${status}")` — the identifier must reference a known variable (`const`, capture, or named parameter); unknown names fail with `E_VALIDATE`. **Standalone `"${identifier}"` in call arguments is rejected** — use the bare form instead. Optionally `ensure ref([args]) catch () ` or `ensure ref([args]) catch (, ) `: the recovery body runs **once** on failure (like a catch clause). There is no retry loop — for retries, use explicit recursion. The first binding (e.g. `failure`) receives the full merged stdout+stderr from the failed rule execution, including output from nested scripts and rules. The optional second binding (e.g. `attempt`) receives the attempt number (always `"1"`). Full output still lives in step **`.out` / `.err`** artifacts. If the failure binding is empty for your rule, persist diagnostics before prompting or assert non-empty. Works in both workflows and rules. - - **run** — `run ref` or `run ref([args...])` runs a workflow or script (local or `alias.name`). **Parentheses are optional when passing zero arguments** — `run setup` is equivalent to `run setup()`. When arguments are present, parentheses are required with comma-separated expressions. **`run` does not forward args by default** — pass named params explicitly (e.g. `run wf(task)`, `run util_fn(name)`). **Bare identifier arguments** are supported and preferred: `run greet(name)` is equivalent to `run greet("${name}")` — the identifier must reference a known variable (`const`, capture, or named parameter); unknown names fail with `E_VALIDATE`. **Standalone `"${identifier}"` in call arguments is rejected** — use the bare form instead (e.g. `run greet(name)` not `run greet("${name}")`). Quoted strings with additional text around the interpolation (e.g. `"prefix_${name}"`) are still allowed. Jaiph keywords cannot be used as bare identifiers. Optionally `run ref([args]) catch () `: the recovery body runs **once** on failure (same semantics as `ensure … catch`). Works in both workflows and rules. Also supports **inline scripts**: `` run `body`(args) `` or `` run ```lang...body...```(args) `` — see Scripts section above. + - **run** — `run ref` or `run ref([args...])` runs a workflow or script (local or `alias.name`). **Parentheses are optional when passing zero arguments** — `run setup` is equivalent to `run setup()`. When arguments are present, parentheses are required with comma-separated expressions. **`run` does not forward args by default** — pass named params explicitly (e.g. `run wf(task)`, `run util_fn(name)`). **Bare identifier arguments** are supported and preferred: `run greet(name)` is equivalent to `run greet("${name}")` — the identifier must reference a known variable (`const`, capture, or named parameter); unknown names fail with `E_VALIDATE`. **Standalone `"${identifier}"` in call arguments is rejected** — use the bare form instead (e.g. `run greet(name)` not `run greet("${name}")`). Quoted strings with additional text around the interpolation (e.g. `"prefix_${name}"`) are still allowed. Jaiph keywords cannot be used as bare identifiers. **Nested managed calls in arguments** are supported with explicit keywords: `run foo(run bar())`, `run foo(ensure check())`, `run foo(run \`echo ok\`())`. Bare call-like forms in arguments (`run foo(bar())`, `run foo(\`echo ok\`())`) are rejected — add the `run` or `ensure` keyword. Optionally `run ref([args]) catch () `: the recovery body runs **once** on failure (same semantics as `ensure … catch`). Works in both workflows and rules. Also supports **inline scripts**: `` run `body`(args) `` or `` run ```lang...body...```(args) `` — see Scripts section above. - **log** — `log "message"` writes the expanded message to **stdout** and emits a **`LOG`** event; the CLI shows it in the progress tree at the current depth. Double-quoted string; `${identifier}` interpolation works at runtime. For multiline messages, use triple quotes: `log """..."""`. **Bare identifier form:** `log foo` (no quotes) expands to `log "${foo}"` — the variable's value is logged. Works with `const`, capture, and named parameters. **Inline capture interpolation** is also supported: `${run ref([args])}` and `${ensure ref([args])}` execute a managed call and inline the result (e.g. `log "Got: ${run greet()}"`). Nested inline captures are rejected. **`LOG`** events and `run_summary.jsonl` store the **same** message string (JSON-escaped for the payload). No spinner, no timing — a static annotation. See [CLI Reference](cli.md) for tree formatting. Useful for marking workflow phases (e.g. `log "Starting analysis phase"`). - **logerr** — `logerr "message"` is identical to `log` except the message goes to **stderr** and the event type is **`LOGERR`**. In the progress tree, `logerr` lines use a red `!` instead of the dim `ℹ` used by `log`. Same quoting, interpolation, bare identifier, and triple-quote rules as `log` (e.g. `logerr err_msg`, `logerr """..."""`). - **Send** — After `<-`, use a **double-quoted literal**, **triple-quoted block** (`channel <- """..."""`), **`${var}`**, or **`run ref([args])`**. An explicit RHS is always required — bare `channel <-` (forward syntax) has been removed. Raw shell on the RHS is rejected — use `const x = run helper()` then `channel <- "${x}"`, or `channel <- run fmt_fn()`. Combining capture and send (`name = channel <- …`) is `E_PARSE`. See [Inbox & Dispatch](inbox.md). diff --git a/docs/language.md b/docs/language.md index 504b0872..c1a82d16 100644 --- a/docs/language.md +++ b/docs/language.md @@ -248,6 +248,36 @@ run process(task, "extra context") # mixed bare + quoted run process("${task}") # equivalent to bare form ``` +### Nested Managed Calls in Arguments + +Call arguments can contain nested managed calls — but the `run` or `ensure` keyword must be explicit. This is a deliberate language rule: scripts and workflows execute only via `run`, and rules execute only via `ensure`, even when nested inside another call's arguments. + +**Valid — explicit nested calls:** + +```jaiph +run mkdir_p_simple(run jaiph_tmp_dir()) +run do_work(ensure check_ok()) +run do_work(run `echo aaa`()) +``` + +The nested call executes first and its result is passed as a single argument to the outer call. + +**Invalid — bare call-like forms:** + +```jaiph +# run do_work(bar()) — E_VALIDATE: use "run bar()" or "ensure bar()" +# run do_work(rule_bar()) — E_VALIDATE: use "ensure rule_bar()" +# run do_work(`echo aaa`()) — E_VALIDATE: use "run `...`()" +# const x = bar() — E_PARSE: use "const x = run bar()" +``` + +The explicit capture-then-pass form is also valid: + +```jaiph +const x = run bar() +run foo(x) +``` + ### Arity Checking When the callee declares named parameters, the compiler validates argument count: diff --git a/src/format/emit.ts b/src/format/emit.ts index bd7c7e08..20f02a35 100644 --- a/src/format/emit.ts +++ b/src/format/emit.ts @@ -371,6 +371,33 @@ function emitSteps(steps: WorkflowStepDef[], pad: string, currentIndent: string) return lines; } +/** Try to parse `` `body`(args) `` from the start of a string. Returns consumed length or null. */ +function parseInlineScriptArg(s: string): { body: string; innerArgs: string; consumed: number } | null { + if (!s.startsWith("`")) return null; + const closeIdx = s.indexOf("`", 1); + if (closeIdx === -1) return null; + const body = s.slice(1, closeIdx); + const afterClose = s.slice(closeIdx + 1); + if (!afterClose.startsWith("(")) return null; + let depth = 1; + let j = 1; + let inQuote: string | null = null; + while (j < afterClose.length && depth > 0) { + const ch = afterClose[j]; + if (inQuote) { + if (ch === inQuote && afterClose[j - 1] !== "\\") inQuote = null; + } else { + if (ch === '"' || ch === "'") inQuote = ch; + else if (ch === "(") depth++; + else if (ch === ")") depth--; + } + j++; + } + if (depth !== 0) return null; + const innerArgs = afterClose.slice(1, j - 1).trim(); + return { body, innerArgs, consumed: closeIdx + 1 + j }; +} + /** Convert space-separated args back to comma-separated format with bare identifiers. */ function formatArgs(args: string, bareIdentifierArgs?: string[]): string { const bare = new Set(bareIdentifierArgs ?? []); @@ -395,6 +422,16 @@ function formatArgs(args: string, bareIdentifierArgs?: string[]): string { i += keyword.length + skipped + consumed; continue; } + // Try inline script form: run `body`(args) + if (keyword === "run") { + const inlineResult = parseInlineScriptArg(afterKeyword); + if (inlineResult) { + const formattedInner = inlineResult.innerArgs ? formatArgs(inlineResult.innerArgs) : ""; + tokens.push(`run \`${inlineResult.body}\`(${formattedInner})`); + i += keyword.length + skipped + inlineResult.consumed; + continue; + } + } } if (args[i] === '"') { let j = i + 1; diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts index 3382c1a3..400d6f4a 100644 --- a/src/runtime/docker.ts +++ b/src/runtime/docker.ts @@ -396,11 +396,24 @@ if [ "$overlay_ok" -ne 1 ]; then printf '\n' >&2 fi else - printf 'jaiph docker: workspace overlay unavailable and rsync copy fallback is unavailable; /jaiph/workspace may be incomplete' >&2 - if [ -n "$overlay_reason" ]; then - printf ' (%s)' "$overlay_reason" >&2 + if cp -a "$LOWER"/. "$MERGED"/ 2>/tmp/jaiph-workspace-cp.err; then + printf 'jaiph docker: workspace overlay unavailable; using cp fallback at /jaiph/workspace' >&2 + if [ -n "$overlay_reason" ]; then + printf ' (%s)' "$overlay_reason" >&2 + fi + printf '\n' >&2 + overlay_ok=1 + else + cp_reason="$(tr '\n' ' ' &2 + if [ -n "$overlay_reason" ]; then + printf ' (%s)' "$overlay_reason" >&2 + fi + if [ -n "$cp_reason" ]; then + printf ' [cp fallback: %s]' "$cp_reason" >&2 + fi + printf '\n' >&2 fi - printf '\n' >&2 fi fi exec "$@" diff --git a/src/runtime/kernel/node-workflow-runtime.ts b/src/runtime/kernel/node-workflow-runtime.ts index c6ee76ad..9678840c 100644 --- a/src/runtime/kernel/node-workflow-runtime.ts +++ b/src/runtime/kernel/node-workflow-runtime.ts @@ -162,7 +162,37 @@ function parseArgsRaw(raw: string, vars: Map, env?: NodeJS.Proce type ParsedArgToken = | { kind: "literal"; value: string } - | { kind: "managed"; managedKind: "run" | "ensure"; ref: string; argsRaw: string }; + | { kind: "managed"; managedKind: "run" | "ensure"; ref: string; argsRaw: string } + | { kind: "managed_inline_script"; body: string; lang?: string; argsRaw: string }; + +/** Try to parse `\`body\`(args)` from a string at a given position. */ +function parseInlineScriptAt(s: string): { body: string; argsRaw: string; consumed: number } | null { + const t = s.trimStart(); + const skippedWs = s.length - t.length; + if (!t.startsWith("`")) return null; + const closeIdx = t.indexOf("`", 1); + if (closeIdx === -1) return null; + const body = t.slice(1, closeIdx); + const afterClose = t.slice(closeIdx + 1); + if (!afterClose.startsWith("(")) return null; + let depth = 1; + let i = 1; + let inQuote: string | null = null; + while (i < afterClose.length && depth > 0) { + const ch = afterClose[i]; + if (inQuote) { + if (ch === inQuote && afterClose[i - 1] !== "\\") inQuote = null; + } else { + if (ch === '"' || ch === "'") inQuote = ch; + else if (ch === "(") depth++; + else if (ch === ")") depth--; + } + i++; + } + if (depth !== 0) return null; + const argsContent = afterClose.slice(1, i - 1).trim(); + return { body, argsRaw: argsContent, consumed: skippedWs + closeIdx + 1 + i }; +} function parseManagedArgAt(raw: string, start: number): { token: ParsedArgToken; next: number } | null { const tail = raw.slice(start); @@ -175,18 +205,33 @@ function parseManagedArgAt(raw: string, start: number): { token: ParsedArgToken; const afterKeyword = raw.slice(start + keyword.length).trimStart(); const skipped = raw.slice(start + keyword.length).length - afterKeyword.length; const call = parseCallRef(afterKeyword); - if (!call) return null; - if (call.rest.length > 0 && !/^\s/.test(call.rest)) return null; - const consumed = afterKeyword.length - call.rest.length; - return { - token: { - kind: "managed", - managedKind: keyword, - ref: call.ref, - argsRaw: call.args ?? "", - }, - next: start + keyword.length + skipped + consumed, - }; + if (call && (call.rest.length === 0 || /^\s/.test(call.rest))) { + const consumed = afterKeyword.length - call.rest.length; + return { + token: { + kind: "managed", + managedKind: keyword, + ref: call.ref, + argsRaw: call.args ?? "", + }, + next: start + keyword.length + skipped + consumed, + }; + } + // Try inline script form: run `body`(args) + if (keyword === "run") { + const inlineResult = parseInlineScriptAt(afterKeyword); + if (inlineResult) { + return { + token: { + kind: "managed_inline_script", + body: inlineResult.body, + argsRaw: inlineResult.argsRaw, + }, + next: start + keyword.length + skipped + inlineResult.consumed, + }; + } + } + return null; } function parseArgTokens(raw: string): ParsedArgToken[] { @@ -1297,6 +1342,16 @@ export class NodeWorkflowRuntime { return `${filePath}::${name}`; } + /** Synchronous fast-path: resolve args when every token is a plain literal. */ + private resolveArgsRawSync(scope: Scope, raw: string | string[]): string[] | null { + if (Array.isArray(raw)) return raw; + const tokens = parseArgTokens(raw); + for (const token of tokens) { + if (token.kind !== "literal") return null; + } + return tokens.map((t) => interpolate((t as { kind: "literal"; value: string }).value, scope.vars, scope.env)); + } + private async resolveArgsRaw(scope: Scope, raw: string | string[]): Promise { if (Array.isArray(raw)) { return raw; @@ -1308,6 +1363,12 @@ export class NodeWorkflowRuntime { resolved.push(interpolate(token.value, scope.vars, scope.env)); continue; } + if (token.kind === "managed_inline_script") { + const result = await this.executeInlineScript(scope, token.body, undefined, token.argsRaw); + if (result.status !== 0) return result; + resolved.push(result.returnValue ?? result.output.trim()); + continue; + } const result = token.managedKind === "run" ? await this.executeRunRef(scope, token.ref, token.argsRaw) : await this.executeEnsureRef(scope, token.ref, token.argsRaw, undefined); @@ -1320,7 +1381,7 @@ export class NodeWorkflowRuntime { } private async executeRunRef(scope: Scope, ref: string, argsRaw: string | string[]): Promise { - const resolvedArgs = await this.resolveArgsRaw(scope, argsRaw); + const resolvedArgs = this.resolveArgsRawSync(scope, argsRaw) ?? await this.resolveArgsRaw(scope, argsRaw); if (!Array.isArray(resolvedArgs)) return resolvedArgs; const args = resolvedArgs; const resolvedWorkflow = resolveWorkflowRef(this.graph, scope.filePath, { value: ref, loc: { line: 1, col: 1 } }); diff --git a/src/transpile/validate-managed-calls.test.ts b/src/transpile/validate-managed-calls.test.ts index d5bb238b..baf91ec6 100644 --- a/src/transpile/validate-managed-calls.test.ts +++ b/src/transpile/validate-managed-calls.test.ts @@ -464,3 +464,160 @@ test("E_VALIDATE: ${arg1} in log is unknown identifier", () => { rmSync(root, { recursive: true, force: true }); } }); + +// --- Explicit nested managed call tests --- + +test("buildScripts accepts run foo(run bar()) — explicit nested managed call", () => { + const root = mkdtempSync(join(tmpdir(), "jaiph-val-nested-run-run-")); + const out = join(root, "out"); + try { + writeFileSync( + join(root, "m.jh"), + [ + 'script mkdir_p_simple = `mkdir -p "$1"`', + 'script jaiph_tmp_dir = `printf "%s\\n" "/tmp/jaiph"`', + "workflow default() {", + " run mkdir_p_simple(run jaiph_tmp_dir())", + "}", + "", + ].join("\n"), + ); + buildScripts(join(root, "m.jh"), out); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + +test("buildScripts accepts run foo(ensure rule_bar()) — explicit nested ensure", () => { + const root = mkdtempSync(join(tmpdir(), "jaiph-val-nested-run-ensure-")); + const out = join(root, "out"); + try { + writeFileSync( + join(root, "m.jh"), + [ + 'script do_work = `echo "$1"`', + "rule check_ok() {", + ' run do_work("ok")', + "}", + "workflow default() {", + " run do_work(ensure check_ok())", + "}", + "", + ].join("\n"), + ); + buildScripts(join(root, "m.jh"), out); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + +test("buildScripts accepts run foo(run `echo aaa`()) — explicit nested inline script", () => { + const root = mkdtempSync(join(tmpdir(), "jaiph-val-nested-run-inline-")); + const out = join(root, "out"); + try { + writeFileSync( + join(root, "m.jh"), + [ + 'script do_work = `echo "$1"`', + "workflow default() {", + " run do_work(run `echo aaa`())", + "}", + "", + ].join("\n"), + ); + buildScripts(join(root, "m.jh"), out); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + +test("buildScripts accepts const x = run bar() followed by run foo(x)", () => { + const root = mkdtempSync(join(tmpdir(), "jaiph-val-capture-then-pass-")); + const out = join(root, "out"); + try { + writeFileSync( + join(root, "m.jh"), + [ + 'script bar = `echo "hello"`', + 'script foo = `echo "$1"`', + "workflow default() {", + " const x = run bar()", + " run foo(x)", + "}", + "", + ].join("\n"), + ); + buildScripts(join(root, "m.jh"), out); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + +test("E_VALIDATE: run foo(rule_bar()) — bare rule call in args is rejected", () => { + const root = mkdtempSync(join(tmpdir(), "jaiph-val-nested-bare-rule-")); + try { + writeFileSync( + join(root, "m.jh"), + [ + 'script do_work = `echo "$1"`', + "rule rule_bar() {", + ' run do_work("ok")', + "}", + "workflow default() {", + " run do_work(rule_bar())", + "}", + "", + ].join("\n"), + ); + assert.throws( + () => buildScripts(join(root, "m.jh"), join(root, "out")), + /nested managed calls in argument position must be explicit/, + ); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + +test("E_VALIDATE: run foo(`echo aaa`()) — bare inline script call in args is rejected", () => { + const root = mkdtempSync(join(tmpdir(), "jaiph-val-nested-bare-inline-")); + try { + writeFileSync( + join(root, "m.jh"), + [ + 'script do_work = `echo "$1"`', + "workflow default() {", + " run do_work(`echo aaa`())", + "}", + "", + ].join("\n"), + ); + assert.throws( + () => buildScripts(join(root, "m.jh"), join(root, "out")), + /nested inline script calls in argument position must be explicit/, + ); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + +test("E_VALIDATE: const x = bar() — bare call in const assignment is rejected", () => { + const root = mkdtempSync(join(tmpdir(), "jaiph-val-const-bare-call-")); + try { + writeFileSync( + join(root, "m.jh"), + [ + 'script bar = `echo "hello"`', + "workflow default() {", + " const x = bar()", + "}", + "", + ].join("\n"), + ); + assert.throws( + () => buildScripts(join(root, "m.jh"), join(root, "out")), + /Script calls in const assignments must use run/, + ); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); diff --git a/src/transpile/validate.ts b/src/transpile/validate.ts index 5b3d18ba..c197bccf 100644 --- a/src/transpile/validate.ts +++ b/src/transpile/validate.ts @@ -302,6 +302,23 @@ function validateNestedManagedCallArgs( `nested managed calls in argument position must be explicit; use "run ${match[1]}(...)" or "ensure ${match[1]}(...)" inside the argument list`, ); } + // Detect bare inline script calls: `body`() without preceding run/ensure + const btRe = /`[^`]*`\s*\(/g; + let btMatch: RegExpExecArray | null; + while ((btMatch = btRe.exec(stripped)) !== null) { + const before = stripped.slice(0, btMatch.index).trimEnd(); + const lastToken = before.length === 0 ? "" : before.slice(before.lastIndexOf(" ") + 1); + if (lastToken === "run" || lastToken === "ensure") { + continue; + } + throw jaiphError( + filePath, + loc.line, + loc.col, + "E_VALIDATE", + `nested inline script calls in argument position must be explicit; use "run \`...\`(...)" inside the argument list`, + ); + } } /** Resolve a route target workflow ref to its declared parameter count. Returns undefined if unresolvable. */ From 955a67b8f3a4388197a5d44e1ded3f11c07e18e7 Mon Sep 17 00:00:00 2001 From: Jakub Dzikowski Date: Fri, 17 Apr 2026 21:32:52 +0200 Subject: [PATCH 04/38] Queue: Harden docker tasks, add version/name/description for jaiph config file Signed-off-by: Jakub Dzikowski --- QUEUE.md | 214 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 125 insertions(+), 89 deletions(-) diff --git a/QUEUE.md b/QUEUE.md index 993c8f50..0b34ce01 100644 --- a/QUEUE.md +++ b/QUEUE.md @@ -12,152 +12,149 @@ Process rules: *** -## Docker — strict image contract + publish official `jaiph-runtime` images to GHCR +## Docker — strict image contract + publish official `jaiph-runtime` images to GHCR #dev-ready **Goal** -Remove all Docker runtime bootstrapping/fallback magic. In Docker mode, **every selected image must already contain a working `jaiph` CLI**. Jaiph must **not** build a thin derived image at runtime, must **not** mount host `dist/` into the container, and must **not** auto-install itself into arbitrary base images. The product contract becomes explicit: if Docker is on, the image is responsible for containing Jaiph. +Remove all Docker runtime bootstrapping/fallback magic. In Docker mode, **every selected image must already contain a working `jaiph` CLI**. Jaiph must **not** build a thin derived image at runtime and must **not** auto-install itself into arbitrary base images. (Today the host uses `npm pack` + `docker build` to install the local package into a derived image; there is no bind-mount of host `dist/`, but that derived-image install path is equally forbidden.) The product contract becomes explicit: if Docker is on, the image is responsible for containing Jaiph. -At the same time, publish an official Jaiph runtime image to **GHCR** and make it the default Docker image: +Publish an official Jaiph runtime image to **GHCR** and make it the default Docker image: * tagged releases → `ghcr.io/jaiphlang/jaiph-runtime:` * nightly builds → `ghcr.io/jaiphlang/jaiph-runtime:nightly` -* default runtime image in Jaiph config/runtime should point at that official image +* default `runtime.docker_image` / env default should point at that official image -This is a deliberate contract change. Convenience fallback to `node:20-bookworm` + runtime bootstrap is **not** desired. +Convenience fallback to `node:20-bookworm` + runtime bootstrap is **not** desired. **Required product decision** 1. **Strict requirement** — all Docker images used by Jaiph must already have `jaiph`. 2. **Official default image** — Jaiph publishes and uses `ghcr.io/jaiphlang/jaiph-runtime`. -3. **No hidden runtime mutation** — no auto-derived image build, no host `dist/` mount hack, no `npm install -g` during Docker run startup. +3. **No hidden runtime mutation** — no auto-derived image build, no `npm install -g` of Jaiph during Docker run startup. 4. **Fast fail** — if the chosen image lacks `jaiph`, Jaiph must fail clearly with an explicit Docker/runtime error. **Why this task exists** -The current codebase has tension between two incompatible models: +The codebase currently mixes a generic contract (`jaiph run --raw` inside the container) with a convenience path (stock images without `jaiph`). Both cannot be true without bootstrapping. This task chooses the strict model and removes the second. -* generic Docker contract: run `jaiph run --raw` inside the container -* convenience contract: allow stock images that do not contain `jaiph` +**Critical implementation detail (from current `src/runtime/docker.ts`)** -Both cannot be true without runtime bootstrapping. This task intentionally chooses the first model and removes the second. +When `imageExplicit === false`, `resolveImage` currently ends in `ensureLocalRuntimeImage`, which **always** targets a derived `jaiph-runtime-auto:*` tag built via `npm pack`, even if the base image already contains `jaiph`. After switching the default to the official GHCR image (or any image that already has `jaiph`), the runtime must **use that image as-is** when `command -v jaiph` succeeds — no auto-derivation. If `jaiph` is missing, fail fast (no fallback build). + +**Resolved defaults (no longer open)** + +* **Default tag rule**: Release npm builds embed `ghcr.io/jaiphlang/jaiph-runtime:` matching the package/`jaiph` version. Main/nightly CI artifacts and docs for contributors use the `:nightly` tag; state the rule explicitly in docs. +* **Cursor / Claude CLIs in the official image**: **Exclude by default** from the minimal `jaiph-runtime` image to keep size and supply chain small; document how to extend a custom image (the managed `.jaiph/Dockerfile` template may remain a fuller example). + +**Queue coordination** + +Ship published GHCR images before or together with the later queued task “Runtime — default Docker when not CI or unsafe”, which will expect a pullable default image for local users. **Context** -* Docker runtime implementation: `src/runtime/docker.ts` -* Docker run path / spawn site: `src/cli/commands/run.ts` -* Docker docs: `docs/sandboxing.md`, `docs/configuration.md`, `docs/cli.md` -* Current Docker E2E coverage: `e2e/tests/72_docker_run_artifacts.sh`, `e2e/tests/73_docker_dockerfile_detection.sh`, `e2e/tests/74_docker_lifecycle.sh` -* Managed project Dockerfile template: `.jaiph/Dockerfile`, plus `jaiph init` scaffolding in `src/cli/commands/init.ts` -* CI/release workflows: `.github/workflows/ci.yml`, `.github/workflows/release.yml`, `.github/workflows/nightly-engineer.yml` +* Docker runtime: `src/runtime/docker.ts` +* Docker run path: `src/cli/commands/run.ts` +* Docs: `docs/sandboxing.md`, `docs/configuration.md`, `docs/cli.md` +* E2E: `e2e/tests/72_docker_run_artifacts.sh`, `e2e/tests/73_docker_dockerfile_detection.sh`, `e2e/tests/74_docker_lifecycle.sh` +* Managed Dockerfile: `.jaiph/Dockerfile`, `src/cli/commands/init.ts` +* CI: `.github/workflows/ci.yml`, `.github/workflows/release.yml`, `.github/workflows/nightly-engineer.yml` **Implementation requirements** -1. **Runtime** - * Remove Docker fallback logic that auto-builds a derived image or auto-installs Jaiph into arbitrary base images. - * Keep the container entry generic: `jaiph run --raw ...` - * Add an explicit preflight/validation step for Docker images: - * either the selected image is the official `ghcr.io/jaiphlang/jaiph-runtime:*`, - * or a custom image that already contains `jaiph`. - * If `jaiph` is missing in the chosen image, fail with a clear error message that tells the user to: - * use the official GHCR image, or - * install Jaiph in their custom image. - -2. **Default image** - * Change the default Docker image away from `node:20-bookworm`. - * Default must become the official GHCR runtime image. - * Decide whether the default tag should be version-pinned at release time and `nightly` on main/nightly builds; document the exact rule. - -3. **Publishing** - * Add CI/release automation to build and publish `ghcr.io/jaiphlang/jaiph-runtime`. - * Publish at least: - * per-tag release images - * `nightly` - * Ensure the published image contains: - * `jaiph` - * Node.js - * `fuse-overlayfs` / Docker runtime prerequisites - * non-root runtime user if that remains part of the sandbox contract - * Decide whether Cursor / Claude CLIs belong in the official runtime image by default; document the decision explicitly. - -4. **Docs** - * Rewrite Docker docs to state the strict image contract clearly. - * Document the official GHCR image as the default and recommended path. - * Document how custom images must install `jaiph`. - * Remove any wording that implies Jaiph will make arbitrary base images work automatically. - -5. **Tests** - * Update E2E/tests so they assert the strict contract, not the bootstrap fallback. - * In particular, tests that currently expect `node:20-bookworm` to work without Jaiph must be rewritten. - * Add/keep a regression test that proves Docker fails clearly when the selected image lacks `jaiph`. +1. **Runtime** — Remove `ensureLocalRuntimeImage` / `buildRuntimeImageFromLocalPackage` / auto-derivation paths. Keep container entry `jaiph run --raw ...`. Preflight: after pull, verify `jaiph` exists in the selected image; if not, error with guidance to use `ghcr.io/jaiphlang/jaiph-runtime` or install Jaiph in a custom image. Preflight is by capability check, not by image name whitelist. +2. **Default image** — Default becomes the official GHCR runtime image (not `node:20-bookworm`). +3. **Publishing** — CI/release builds and pushes `ghcr.io/jaiphlang/jaiph-runtime` for release tags and `nightly`. Image includes Node.js, `jaiph`, `fuse-overlayfs` (and other sandbox prereqs per `.jaiph/Dockerfile`), and non-root user if that remains the contract. +4. **Docs** — Rewrite Docker sections for the strict contract; remove language about auto-derived images and stock bases “just working.” +5. **Tests** — Update E2E for strict contract; add/keep regression that an image without `jaiph` fails with a clear error. + +**Scope note** + +Expect changes across more than three files (runtime, CI workflows, init scaffolding, docs, E2E, unit tests). Prefer plain functions and small helpers; `docker.ts` is already large—avoid speculative abstractions. **Acceptance criteria** * Default Docker image is `ghcr.io/jaiphlang/jaiph-runtime:*`, not `node:20-bookworm`. * Jaiph never auto-builds a derived runtime image at Docker run time. -* Jaiph never mounts host build output into the container to provide `jaiph`. +* Jaiph never injects Jaiph into the container except by using an image that already contains it (no `npm pack` bootstrap). * A custom image without `jaiph` fails fast with a clear actionable error. * Official GHCR runtime images are published for release tags and `nightly`. * Docs describe the strict contract and official image flow without ambiguity. -* Unit + E2E coverage prevents regression back to runtime bootstrap behavior. +* Unit + E2E coverage prevents regression to bootstrap behavior. -*** +## Support optional config properties in Jaiph DSL: version, name, description. #dev-ready -## Support optional config properties in Jaiph DSL: version, name, description. +**Goal** -## Runtime — credential proxy for Docker mode +Add optional module-scoped manifest fields in the module-level `config { }` block so a `.jh` file can declare human-readable **name**, **version**, and **description** without changing agent/run/runtime execution. -**Goal** -Containers should never hold real API keys. Implement a host-side HTTP proxy (the "Phantom Token" pattern) that intercepts outbound API requests from containers, strips a placeholder credential, and injects the real key before forwarding upstream. The agent inside the container literally cannot leak the real key — it never has it. +**Keys (dot-separated, string values)** -**Design** +- `module.name` +- `module.version` +- `module.description` -1. **Host-side proxy** — a lightweight `http.createServer` bound to `127.0.0.1:` (macOS/WSL2) or the `docker0` bridge IP (Linux). Receives requests from the container, swaps `x-api-key: placeholder` with the real key from host env, forwards to the upstream API, pipes the response back (including streaming SSE). -2. **Container env injection** — instead of passing `ANTHROPIC_API_KEY=$real_key` into `docker run`, pass `ANTHROPIC_API_KEY=placeholder` + `ANTHROPIC_BASE_URL=http://host.docker.internal:`. -3. **Multi-backend routing** — Jaiph supports Claude and Cursor backends. Each backend's CLI must respect a base URL override env var. `claude` CLI supports `ANTHROPIC_BASE_URL`; `cursor-agent` may not — needs investigation. -4. **Lifecycle** — proxy starts before the first Docker container launch, shuts down after the last container exits or on Jaiph process exit. +All optional; omitted keys leave the corresponding field unset. -**Context** +**Semantics** + +- Values use the same double-quoted string rules as other config strings (existing escapes). No semver validation in v1 unless a later task adds it. +- **Module-level only:** `module.*` keys must not appear in workflow-level `config { }` blocks. After parsing, reject workflow-level config that sets any `module.*` key, using the same pattern as the existing `runtime.*` workflow guard in `src/parse/workflows.ts`. +- Stored on `WorkflowMetadata` as descriptive metadata only. They do **not** map into `JaiphConfig`, environment resolution, or the Node workflow runtime unless a future task wires them (e.g. MCP tool metadata). + +**Implementation touchpoints** -* Pattern reference: [NanoClaw's credential proxy](https://jonno.nz/posts/nanoclaw-architecture-masterclass-in-doing-less/) — same approach, independently arrived at. -* Current Docker execution path: `src/runtime/kernel/` — Docker run/exec logic, env var forwarding. -* Dockerfile: `.jaiph/Dockerfile` — container image setup. -* Backend CLI invocation: `src/runtime/kernel/node-workflow-runtime.ts` — where `claude` / `cursor-agent` commands are constructed with env vars. +- `src/parse/metadata.ts` — `ALLOWED_KEYS`, `KEY_TYPES`, `assignConfigKey`. +- `src/types.ts` — optional `module?: { name?: string; version?: string; description?: string }` on `WorkflowMetadata`. +- `src/format/emit.ts` — formatter round-trip for the new keys. +- `src/parse/workflows.ts` — workflow-level rejection for `module.*` (mirror `metadata.runtime`). +- Tests: `src/parse/parse-metadata.test.ts`; update parse-error golden/txtar cases if the unknown-key allowed-list appears in expectations. +- Docs: `docs/configuration.md`, `docs/grammar.md` (`config_key`). -**Open questions** +**Non-goals** -* Does `cursor-agent` support a base URL override? If not, the proxy pattern may require a wrapper script or LD\_PRELOAD-based interception inside the container. -* Single port with path-based routing vs one port per backend? -* Should the proxy also enforce rate limits or audit-log API calls? +- Environment variables, CLI output, or runtime behavior changes beyond parsing/formatting/validation. + +**Queue coordination** + +- No conflict with the queued `jaiph serve` MCP task; future work may read `module.description` for tool listings. **Acceptance criteria** -* Host-side proxy starts automatically when Docker mode is active. -* Containers receive only placeholder credentials — no real API keys in container env. -* `claude` CLI calls from inside Docker succeed via the proxy. -* Proxy handles streaming responses (SSE) correctly. -* Real keys never appear in container logs, env dumps, or process listings. -* Platform-specific host address resolution works (macOS, Linux). +- Module-level `config` accepts `module.name`, `module.version`, and `module.description`; values round-trip through `jaiph format`. +- Workflow-level `config` containing any `module.*` assignment fails with an explicit error (consistent with `runtime.*` workflow rules). +- Unit tests cover happy path and workflow rejection; docs and grammar list the keys. -*** +**Scope note** + +- Expect more than three files (parser, types, formatter, workflows guard, tests, docs); keep the existing plain `assignConfigKey` style — no new abstraction layers. -## Runtime — harden Docker execution environment +## Runtime — harden Docker execution environment #dev-ready **Goal** -Docker mode is the isolation boundary for workflow runs. Harden it: least-privilege mounts, explicit and documented env forwarding (what crosses the container boundary), network defaults, image supply chain, and failure modes when Docker is misconfigured or unavailable — so "Docker on" is a deliberate security posture, not accidental leakage. +Docker mode is the isolation boundary for workflow runs. Harden it: least-privilege mounts, explicit and documented env forwarding (what crosses the container boundary), network defaults, and failure modes when Docker is misconfigured or unavailable — so "Docker on" is a deliberate security posture, not accidental leakage. (Image provenance and the official default image belong to the queued **Docker — strict image contract + GHCR** task; this task only documents or tightens runtime-visible pull/verify behavior as needed, without redefining publishing or the default image.) **Context** -* Docker runtime: `src/runtime/kernel/` — look for `docker.ts` or Docker-related logic in the run path. -* E2E Docker tests: `e2e/tests/72_docker_run_artifacts.sh`, `e2e/tests/73_docker_dockerfile_detection.sh`. -* Config: `runtime.docker_enabled`, `runtime.docker_timeout`, `runtime.workspace` keys in `src/config.ts` and metadata parsing. +* Docker runtime: `src/runtime/docker.ts` (`parseMounts` / `validateMounts`, `resolveDockerConfig`, `buildDockerArgs`, `checkDockerAvailable`, `spawnDockerProcess`); CLI integration: `src/cli/commands/run.ts`. +* Current forwarding: `buildDockerArgs` remaps `JAIPH_WORKSPACE` and `JAIPH_RUNS_DIR`, passes through `JAIPH_*` except `JAIPH_DOCKER_*`, and passes keys prefixed `CURSOR_`, `ANTHROPIC_`, or `CLAUDE_` (see `AGENT_ENV_PREFIXES` in `docker.ts`). Mounts come from resolved `runtime.workspace` plus fixed rw run-dir, ro overlay script, and `--device /dev/fuse`. +* E2E: `e2e/tests/72_docker_run_artifacts.sh`, `e2e/tests/73_docker_dockerfile_detection.sh`. +* Config: `runtime.docker_enabled`, `runtime.docker_image`, `runtime.docker_network`, `runtime.docker_timeout`, `runtime.workspace` via `src/config.ts` and metadata parsing. + +**Queue coordination** + +* Land after or together with **Docker — strict image contract + publish official `jaiph-runtime` images to GHCR** so bootstrap removal and default image changes are settled before deep hardening refactors the same code paths. +* Land after or together with **Runtime — credential proxy for Docker mode** so any env allowlist/denylist and `docs/sandboxing.md` text stay consistent with placeholder `ANTHROPIC_*` and host-reachable API base URLs (no real secrets in `-e`). +* The later task **Runtime — default Docker when not CI or unsafe** changes `runtime.docker_enabled` defaults; avoid conflicting precedence — document how hardened Docker behavior interacts with that default once both exist. **Acceptance criteria** -* Threat-model notes (short section in `docs/sandboxing.md` or equivalent): what Docker is / isn't protecting against. -* Concrete hardening changes in `docker.ts` / run path (e.g. mount validation, env allowlist or documented denylist, safer defaults) with unit tests. +* Threat-model notes (short section in `docs/sandboxing.md` or equivalent): what Docker is / is not protecting against (including that hooks run on the host). +* Concrete hardening changes in `docker.ts` / run path (e.g. mount validation, env allowlist or documented denylist aligned with the credential-proxy contract, safer defaults) with unit tests. * No silent widen of host access without opt-in. +* Document network mode behavior (`runtime.docker_network` / `--network`) and failure modes for missing Docker or failed pulls (`E_DOCKER_*`), extending existing patterns where appropriate. -*** +**Scope note** + +* `docker.ts` is already large (~650+ lines); prefer small helpers or one focused sibling module over speculative abstractions. Expect at least `docker.ts`, `docker.test.ts`, and `docs/sandboxing.md`; split follow-ups if the change set outgrows one cycle. ## Runtime — default Docker when not CI or unsafe #dev-ready @@ -180,6 +177,45 @@ Introduce **`JAIPH_UNSAFE=true`** as the explicit "run on host / skip Docker def *** +## Runtime — credential proxy for Docker mode + +**Goal** +Containers should never hold real API keys. Implement a host-side HTTP proxy (the Phantom Token pattern) that intercepts outbound API requests from containers, strips a placeholder credential, and injects the real key from the host process environment before forwarding upstream. The workload in the container never receives the real secret. + +**Design** + +1. **Host-side proxy** — A lightweight Node HTTP server bound to an address **reachable from the container network** (typically **`0.0.0.0:`** on the host; binding only `127.0.0.1` is often wrong for container-to-host access). For each request: replace placeholder auth with the real `ANTHROPIC_API_KEY` from the host, forward to the real Anthropic API base URL from host configuration, stream the response back (including SSE). +2. **Container env injection** — In `src/runtime/docker.ts` (`buildDockerArgs` / env passed into `-e`): pass `ANTHROPIC_API_KEY=` and `ANTHROPIC_BASE_URL=http://host.docker.internal:` (or `http://:`). Never pass the real key in `-e`. +3. **Linux networking** — When using the hostname `host.docker.internal`, add **`--add-host=host.docker.internal:host-gateway`** to the `docker run` argument list where supported so the name resolves inside the container. +4. **Backends (v1 scope)** — **Claude / Anthropic only.** The Anthropic SDK and `claude` CLI honor `ANTHROPIC_BASE_URL`. **Cursor (`cursor-agent`)** does not have a documented equivalent to `ANTHROPIC_BASE_URL` in public Cursor CLI docs; **leave Cursor and codex (`OPENAI_*`) out of this task** and open a follow-up if the product needs the same guarantee there. +5. **Routing** — **Single listen port** and a single Anthropic-compatible upstream in v1. Multi-upstream path routing is deferred. +6. **Non-goals (v1)** — Rate limits and audit logging. +7. **Lifecycle** — Start the proxy before the first `spawnDockerProcess` for that Jaiph process; stop it when tearing down the Docker run (and on Jaiph exit), with reference counting if multiple Docker runs can occur in one process. + +**Context** + +* Pattern reference: [NanoClaw credential proxy](https://jonno.nz/posts/nanoclaw-architecture-masterclass-in-doing-less/). +* **Implementation touchpoints** — `src/runtime/docker.ts` (primary: `-e` forwarding, optional extra Docker flags), `src/cli/commands/run.ts` (spawn/cleanup lifecycle). Agent CLI args/env preparation: `src/runtime/kernel/prompt.ts` (likely unchanged). +* Image template: `.jaiph/Dockerfile`. + +**Queue coordination** + +* This edits the same `docker.ts` / Docker spawn path as the queued **Docker — strict image contract + GHCR** task—land together or immediately after to reduce merge churn. +* Later **Runtime — harden Docker execution environment** may tighten env policy; document proxy-related variables when that work lands. + +**Acceptance criteria** + +* Host-side proxy starts automatically when Docker mode is active (Anthropic/Claude path). +* Containers receive only a placeholder `ANTHROPIC_API_KEY` — no real Anthropic API key in container environment. +* `claude` CLI calls from inside Docker succeed via the proxy. +* Proxy handles streaming responses (SSE) correctly. +* Real keys do not appear in Jaiph-supplied container `-e` values (so they do not appear in `docker inspect` for those vars or in container `printenv` for them as anything but the placeholder). +* macOS and Linux: documented/working host reachability (`host.docker.internal` + `host-gateway` on Linux as needed, or an equivalent bridge address). + +**Scope note** + +* Target **~3 files**: one small new module for the proxy plus focused edits in `docker.ts` and `run.ts`. Plain functions, no new abstraction layers. + ## `jaiph serve` — expose workflows as an MCP server #dev-ready **Goal** From d398b7a51f6dbb14ad86f1f82203135d995b2828 Mon Sep 17 00:00:00 2001 From: Jakub Dzikowski Date: Fri, 17 Apr 2026 21:51:57 +0200 Subject: [PATCH 05/38] Feat: Enforce strict Docker image contract and publish official GHCR runtime images Remove all auto-derivation and runtime bootstrap paths from Docker mode. The runtime no longer builds derived images via npm pack or installs jaiph into arbitrary base images at run time. Every Docker image must already contain a working jaiph CLI; missing jaiph now fails fast with an actionable error. Default docker_image switches from node:20-bookworm to the official ghcr.io/jaiphlang/jaiph-runtime image. A new CI workflow publishes that image for release tags and nightly builds. Docs, init scaffolding, and E2E tests are updated to reflect the strict contract. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/docker-publish.yml | 72 +++++++++ CHANGELOG.md | 1 + QUEUE.md | 110 +++---------- docker/Dockerfile.runtime | 42 +++++ docs/architecture.md | 2 +- docs/cli.md | 4 +- docs/configuration.md | 2 +- docs/sandboxing.md | 40 ++++- e2e/lib/common.sh | 53 ++++++ e2e/tests/00_install_and_init.sh | 35 +--- e2e/tests/72_docker_run_artifacts.sh | 39 ++++- e2e/tests/73_docker_dockerfile_detection.sh | 43 +++-- e2e/tests/74_docker_lifecycle.sh | 11 +- src/cli/commands/init.test.ts | 9 +- src/cli/commands/init.ts | 33 +--- src/runtime/docker.test.ts | 34 +++- src/runtime/docker.ts | 168 +++++--------------- 17 files changed, 392 insertions(+), 306 deletions(-) create mode 100644 .github/workflows/docker-publish.yml create mode 100644 docker/Dockerfile.runtime diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml new file mode 100644 index 00000000..29b1c82f --- /dev/null +++ b/.github/workflows/docker-publish.yml @@ -0,0 +1,72 @@ +name: Publish Docker runtime image + +on: + push: + branches: [nightly] + tags: ["v*"] + +permissions: + contents: read + packages: write + +env: + REGISTRY: ghcr.io + IMAGE_NAME: jaiphlang/jaiph-runtime + +jobs: + publish: + name: Build and push jaiph-runtime + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Node + uses: actions/setup-node@v4 + with: + node-version: "20" + cache: npm + + - name: Install dependencies and build + run: | + npm ci + npm run build + + - name: Create npm tarball + run: npm pack --pack-destination docker/ + + - name: Rename tarball for Dockerfile + run: | + cd docker + mv jaiph-*.tgz jaiph.tgz + + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Determine image tags + id: tags + run: | + if [[ "${GITHUB_REF}" == refs/tags/v* ]]; then + VERSION="${GITHUB_REF_NAME#v}" + echo "tags=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${VERSION},${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest" >> "$GITHUB_OUTPUT" + else + echo "tags=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:nightly" >> "$GITHUB_OUTPUT" + fi + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: docker + file: docker/Dockerfile.runtime + push: true + tags: ${{ steps.tags.outputs.tags }} + build-args: JAIPH_TARBALL=jaiph.tgz + + - name: Verify pushed image contains jaiph + run: | + TAG="$(echo '${{ steps.tags.outputs.tags }}' | cut -d',' -f1)" + docker run --rm --entrypoint sh "${TAG}" -lc "command -v jaiph && jaiph --version" diff --git a/CHANGELOG.md b/CHANGELOG.md index cd5409d9..dd64875c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## All changes +- **Breaking — Docker:** Strict image contract and official GHCR runtime images — Docker mode now enforces a strict contract: every Docker image used by Jaiph must already contain a working `jaiph` CLI. Jaiph no longer auto-builds derived images or bootstraps itself into containers at runtime (no `npm pack`, no `npm install -g` into arbitrary base images). If the selected image lacks `jaiph`, the run fails immediately with `E_DOCKER_NO_JAIPH` and actionable guidance. The default `runtime.docker_image` is now `ghcr.io/jaiphlang/jaiph-runtime:` (matching the installed jaiph version), replacing the previous `node:20-bookworm` default. Official runtime images are published to GHCR: `ghcr.io/jaiphlang/jaiph-runtime:` for release tags, `:nightly` for the nightly branch, and `:latest` as a convenience alias. The official image includes Node.js, jaiph, `fuse-overlayfs`, and a non-root `jaiph` user (UID 10001); it does not include agent CLIs to keep the image minimal. The `jaiph init` Dockerfile template now extends the official image (`FROM ghcr.io/jaiphlang/jaiph-runtime:nightly`) and only adds agent CLIs (Claude Code, cursor-agent), instead of building from `ubuntu:latest` with a full install chain. Removed functions: `ensureLocalRuntimeImage`, `buildRuntimeImageFromLocalPackage`, `autoRuntimeImageTag`, `imageConfiguredUser`, `imageHomeDir`. Added: `verifyImageHasJaiph`, `GHCR_IMAGE_REPO`, `resolveDefaultImageTag`. CI: new `.github/workflows/docker-publish.yml` publishes the runtime image on release tags and nightly pushes. Implementation: `src/runtime/docker.ts`, `src/cli/commands/init.ts`, `docker/Dockerfile.runtime`. Unit and E2E tests updated for the strict contract — regression test confirms images without jaiph fail with `E_DOCKER_NO_JAIPH`. Docs updated (`docs/sandboxing.md`, `docs/configuration.md`, `docs/cli.md`, `docs/architecture.md`). - **Feature — Language/Runtime:** Explicit nested managed calls in argument position — Call arguments can now contain nested managed calls using `run` or `ensure` keywords explicitly: `run foo(run bar())`, `run foo(ensure rule_bar())`, and `run foo(run \`echo "aaa"\`())`. The nested call executes first and its result is passed as a single argument to the outer call. Bare call-like forms in argument position are rejected at compile time: `run foo(bar())` → `E_VALIDATE` with an actionable message telling the user to add `run` or `ensure`. Bare inline script calls in argument position (`run foo(\`echo aaa\`())`) are also rejected with guidance. The explicit capture-then-pass form (`const x = run bar()` followed by `run foo(x)`) remains valid. Bare call-like forms in `const` assignments (`const x = bar()`) are also rejected — use `const x = run bar()`. The formatter round-trips explicit nested forms correctly, including the inline script variant. The runtime evaluates nested managed argument tokens (workflows, scripts, rules, and inline scripts) before passing the result to the outer call. Implementation: validator (`src/transpile/validate.ts` — `validateNestedManagedCallArgs` extended for inline script detection), runtime (`src/runtime/kernel/node-workflow-runtime.ts` — `managed_inline_script` token kind, `parseInlineScriptAt`, `resolveArgsRawSync` fast path), formatter (`src/format/emit.ts` — `parseInlineScriptArg`, inline script formatting in `formatArgs`). Regression tests added for all valid and invalid forms. Docs updated (`docs/language.md`, `docs/grammar.md`, `docs/jaiph-skill.md`). # 0.9.2 diff --git a/QUEUE.md b/QUEUE.md index 0b34ce01..e6ffada4 100644 --- a/QUEUE.md +++ b/QUEUE.md @@ -12,74 +12,6 @@ Process rules: *** -## Docker — strict image contract + publish official `jaiph-runtime` images to GHCR #dev-ready - -**Goal** -Remove all Docker runtime bootstrapping/fallback magic. In Docker mode, **every selected image must already contain a working `jaiph` CLI**. Jaiph must **not** build a thin derived image at runtime and must **not** auto-install itself into arbitrary base images. (Today the host uses `npm pack` + `docker build` to install the local package into a derived image; there is no bind-mount of host `dist/`, but that derived-image install path is equally forbidden.) The product contract becomes explicit: if Docker is on, the image is responsible for containing Jaiph. - -Publish an official Jaiph runtime image to **GHCR** and make it the default Docker image: - -* tagged releases → `ghcr.io/jaiphlang/jaiph-runtime:` -* nightly builds → `ghcr.io/jaiphlang/jaiph-runtime:nightly` -* default `runtime.docker_image` / env default should point at that official image - -Convenience fallback to `node:20-bookworm` + runtime bootstrap is **not** desired. - -**Required product decision** - -1. **Strict requirement** — all Docker images used by Jaiph must already have `jaiph`. -2. **Official default image** — Jaiph publishes and uses `ghcr.io/jaiphlang/jaiph-runtime`. -3. **No hidden runtime mutation** — no auto-derived image build, no `npm install -g` of Jaiph during Docker run startup. -4. **Fast fail** — if the chosen image lacks `jaiph`, Jaiph must fail clearly with an explicit Docker/runtime error. - -**Why this task exists** - -The codebase currently mixes a generic contract (`jaiph run --raw` inside the container) with a convenience path (stock images without `jaiph`). Both cannot be true without bootstrapping. This task chooses the strict model and removes the second. - -**Critical implementation detail (from current `src/runtime/docker.ts`)** - -When `imageExplicit === false`, `resolveImage` currently ends in `ensureLocalRuntimeImage`, which **always** targets a derived `jaiph-runtime-auto:*` tag built via `npm pack`, even if the base image already contains `jaiph`. After switching the default to the official GHCR image (or any image that already has `jaiph`), the runtime must **use that image as-is** when `command -v jaiph` succeeds — no auto-derivation. If `jaiph` is missing, fail fast (no fallback build). - -**Resolved defaults (no longer open)** - -* **Default tag rule**: Release npm builds embed `ghcr.io/jaiphlang/jaiph-runtime:` matching the package/`jaiph` version. Main/nightly CI artifacts and docs for contributors use the `:nightly` tag; state the rule explicitly in docs. -* **Cursor / Claude CLIs in the official image**: **Exclude by default** from the minimal `jaiph-runtime` image to keep size and supply chain small; document how to extend a custom image (the managed `.jaiph/Dockerfile` template may remain a fuller example). - -**Queue coordination** - -Ship published GHCR images before or together with the later queued task “Runtime — default Docker when not CI or unsafe”, which will expect a pullable default image for local users. - -**Context** - -* Docker runtime: `src/runtime/docker.ts` -* Docker run path: `src/cli/commands/run.ts` -* Docs: `docs/sandboxing.md`, `docs/configuration.md`, `docs/cli.md` -* E2E: `e2e/tests/72_docker_run_artifacts.sh`, `e2e/tests/73_docker_dockerfile_detection.sh`, `e2e/tests/74_docker_lifecycle.sh` -* Managed Dockerfile: `.jaiph/Dockerfile`, `src/cli/commands/init.ts` -* CI: `.github/workflows/ci.yml`, `.github/workflows/release.yml`, `.github/workflows/nightly-engineer.yml` - -**Implementation requirements** - -1. **Runtime** — Remove `ensureLocalRuntimeImage` / `buildRuntimeImageFromLocalPackage` / auto-derivation paths. Keep container entry `jaiph run --raw ...`. Preflight: after pull, verify `jaiph` exists in the selected image; if not, error with guidance to use `ghcr.io/jaiphlang/jaiph-runtime` or install Jaiph in a custom image. Preflight is by capability check, not by image name whitelist. -2. **Default image** — Default becomes the official GHCR runtime image (not `node:20-bookworm`). -3. **Publishing** — CI/release builds and pushes `ghcr.io/jaiphlang/jaiph-runtime` for release tags and `nightly`. Image includes Node.js, `jaiph`, `fuse-overlayfs` (and other sandbox prereqs per `.jaiph/Dockerfile`), and non-root user if that remains the contract. -4. **Docs** — Rewrite Docker sections for the strict contract; remove language about auto-derived images and stock bases “just working.” -5. **Tests** — Update E2E for strict contract; add/keep regression that an image without `jaiph` fails with a clear error. - -**Scope note** - -Expect changes across more than three files (runtime, CI workflows, init scaffolding, docs, E2E, unit tests). Prefer plain functions and small helpers; `docker.ts` is already large—avoid speculative abstractions. - -**Acceptance criteria** - -* Default Docker image is `ghcr.io/jaiphlang/jaiph-runtime:*`, not `node:20-bookworm`. -* Jaiph never auto-builds a derived runtime image at Docker run time. -* Jaiph never injects Jaiph into the container except by using an image that already contains it (no `npm pack` bootstrap). -* A custom image without `jaiph` fails fast with a clear actionable error. -* Official GHCR runtime images are published for release tags and `nightly`. -* Docs describe the strict contract and official image flow without ambiguity. -* Unit + E2E coverage prevents regression to bootstrap behavior. - ## Support optional config properties in Jaiph DSL: version, name, description. #dev-ready **Goal** @@ -88,44 +20,44 @@ Add optional module-scoped manifest fields in the module-level `config { }` bloc **Keys (dot-separated, string values)** -- `module.name` -- `module.version` -- `module.description` +* `module.name` +* `module.version` +* `module.description` All optional; omitted keys leave the corresponding field unset. **Semantics** -- Values use the same double-quoted string rules as other config strings (existing escapes). No semver validation in v1 unless a later task adds it. -- **Module-level only:** `module.*` keys must not appear in workflow-level `config { }` blocks. After parsing, reject workflow-level config that sets any `module.*` key, using the same pattern as the existing `runtime.*` workflow guard in `src/parse/workflows.ts`. -- Stored on `WorkflowMetadata` as descriptive metadata only. They do **not** map into `JaiphConfig`, environment resolution, or the Node workflow runtime unless a future task wires them (e.g. MCP tool metadata). +* Values use the same double-quoted string rules as other config strings (existing escapes). No semver validation in v1 unless a later task adds it. +* **Module-level only:** `module.*` keys must not appear in workflow-level `config { }` blocks. After parsing, reject workflow-level config that sets any `module.*` key, using the same pattern as the existing `runtime.*` workflow guard in `src/parse/workflows.ts`. +* Stored on `WorkflowMetadata` as descriptive metadata only. They do **not** map into `JaiphConfig`, environment resolution, or the Node workflow runtime unless a future task wires them (e.g. MCP tool metadata). **Implementation touchpoints** -- `src/parse/metadata.ts` — `ALLOWED_KEYS`, `KEY_TYPES`, `assignConfigKey`. -- `src/types.ts` — optional `module?: { name?: string; version?: string; description?: string }` on `WorkflowMetadata`. -- `src/format/emit.ts` — formatter round-trip for the new keys. -- `src/parse/workflows.ts` — workflow-level rejection for `module.*` (mirror `metadata.runtime`). -- Tests: `src/parse/parse-metadata.test.ts`; update parse-error golden/txtar cases if the unknown-key allowed-list appears in expectations. -- Docs: `docs/configuration.md`, `docs/grammar.md` (`config_key`). +* `src/parse/metadata.ts` — `ALLOWED_KEYS`, `KEY_TYPES`, `assignConfigKey`. +* `src/types.ts` — optional `module?: { name?: string; version?: string; description?: string }` on `WorkflowMetadata`. +* `src/format/emit.ts` — formatter round-trip for the new keys. +* `src/parse/workflows.ts` — workflow-level rejection for `module.*` (mirror `metadata.runtime`). +* Tests: `src/parse/parse-metadata.test.ts`; update parse-error golden/txtar cases if the unknown-key allowed-list appears in expectations. +* Docs: `docs/configuration.md`, `docs/grammar.md` (`config_key`). **Non-goals** -- Environment variables, CLI output, or runtime behavior changes beyond parsing/formatting/validation. +* Environment variables, CLI output, or runtime behavior changes beyond parsing/formatting/validation. **Queue coordination** -- No conflict with the queued `jaiph serve` MCP task; future work may read `module.description` for tool listings. +* No conflict with the queued `jaiph serve` MCP task; future work may read `module.description` for tool listings. **Acceptance criteria** -- Module-level `config` accepts `module.name`, `module.version`, and `module.description`; values round-trip through `jaiph format`. -- Workflow-level `config` containing any `module.*` assignment fails with an explicit error (consistent with `runtime.*` workflow rules). -- Unit tests cover happy path and workflow rejection; docs and grammar list the keys. +* Module-level `config` accepts `module.name`, `module.version`, and `module.description`; values round-trip through `jaiph format`. +* Workflow-level `config` containing any `module.*` assignment fails with an explicit error (consistent with `runtime.*` workflow rules). +* Unit tests cover happy path and workflow rejection; docs and grammar list the keys. **Scope note** -- Expect more than three files (parser, types, formatter, workflows guard, tests, docs); keep the existing plain `assignConfigKey` style — no new abstraction layers. +* Expect more than three files (parser, types, formatter, workflows guard, tests, docs); keep the existing plain `assignConfigKey` style — no new abstraction layers. ## Runtime — harden Docker execution environment #dev-ready @@ -154,7 +86,7 @@ Docker mode is the isolation boundary for workflow runs. Harden it: least-privil **Scope note** -* `docker.ts` is already large (~650+ lines); prefer small helpers or one focused sibling module over speculative abstractions. Expect at least `docker.ts`, `docker.test.ts`, and `docs/sandboxing.md`; split follow-ups if the change set outgrows one cycle. +* `docker.ts` is already large (\~650+ lines); prefer small helpers or one focused sibling module over speculative abstractions. Expect at least `docker.ts`, `docker.test.ts`, and `docs/sandboxing.md`; split follow-ups if the change set outgrows one cycle. ## Runtime — default Docker when not CI or unsafe #dev-ready @@ -177,6 +109,8 @@ Introduce **`JAIPH_UNSAFE=true`** as the explicit "run on host / skip Docker def *** +## Docker sandbox: Figure out a way to pass code changes from engineer.jh in docker mode to local env (git patch saved to .jaiph/runs?) + ## Runtime — credential proxy for Docker mode **Goal** @@ -214,7 +148,7 @@ Containers should never hold real API keys. Implement a host-side HTTP proxy (th **Scope note** -* Target **~3 files**: one small new module for the proxy plus focused edits in `docker.ts` and `run.ts`. Plain functions, no new abstraction layers. +* Target **\~3 files**: one small new module for the proxy plus focused edits in `docker.ts` and `run.ts`. Plain functions, no new abstraction layers. ## `jaiph serve` — expose workflows as an MCP server #dev-ready diff --git a/docker/Dockerfile.runtime b/docker/Dockerfile.runtime new file mode 100644 index 00000000..0e3baad6 --- /dev/null +++ b/docker/Dockerfile.runtime @@ -0,0 +1,42 @@ +# Official Jaiph runtime image — ghcr.io/jaiphlang/jaiph-runtime +# +# This is the minimal image used by `jaiph run --docker` when no custom image +# is configured. It contains Node.js, jaiph, and fuse-overlayfs for the +# copy-on-write workspace overlay. +# +# Agent CLIs (Claude Code, cursor-agent) are excluded to keep the image small. +# To add them, extend this image or use a custom .jaiph/Dockerfile. +# +# Tags: +# : — matches the jaiph npm package version (built on release) +# :nightly — built from the nightly branch on each CI run + +FROM node:20-bookworm-slim + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + bash \ + curl \ + git \ + ca-certificates \ + fuse-overlayfs \ + fuse3 \ + rsync && \ + rm -rf /var/lib/apt/lists/* + +# Non-root user for sandbox safety. +RUN useradd --create-home --uid 10001 --shell /bin/bash jaiph && \ + mkdir -p /jaiph/workspace /jaiph/workspace-ro /jaiph/run && \ + chown -R jaiph:jaiph /jaiph + +# Install jaiph from the local tarball (provided at build time via --build-arg). +# The tarball is produced by `npm pack` in CI before the docker build step. +ARG JAIPH_TARBALL=jaiph.tgz +COPY ${JAIPH_TARBALL} /tmp/jaiph.tgz +RUN npm install -g /tmp/jaiph.tgz && rm -f /tmp/jaiph.tgz + +USER jaiph +ENV HOME=/home/jaiph +ENV PATH="/home/jaiph/.local/bin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" + +WORKDIR /jaiph/workspace diff --git a/docs/architecture.md b/docs/architecture.md index 787b33cb..936e3022 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -56,7 +56,7 @@ All orchestration — local `jaiph run`, `jaiph test`, and **Docker `jaiph run`* - `jaiph format` rewrites `.jh` / `.test.jh` files into canonical style. Pure AST→text emitter; no side-effects beyond file writes. - **Docker runtime helper (`src/runtime/docker.ts`)** - - Parses mount specs, resolves Docker config (image, network, timeout), and builds the `docker run` invocation used by `jaiph run --docker`. The container runs the same `node-workflow-runner` process as local execution. The spawn call uses `stdio: ["ignore", "pipe", "pipe"]` — stdin is ignored to prevent the Docker CLI from blocking on stdin EOF, which would stall event streaming and cause the host CLI to hang after the container exits. + - Parses mount specs, resolves Docker config (image, network, timeout), and builds the `docker run` invocation used by `jaiph run --docker`. The container runs the same `node-workflow-runner` process as local execution. The default image is the official `ghcr.io/jaiphlang/jaiph-runtime` GHCR image; every selected image must already contain `jaiph` (no auto-install or derived-image build at runtime). The spawn call uses `stdio: ["ignore", "pipe", "pipe"]` — stdin is ignored to prevent the Docker CLI from blocking on stdin EOF, which would stall event streaming and cause the host CLI to hang after the container exits. ## Runtime vs CLI responsibilities diff --git a/docs/cli.md b/docs/cli.md index 04fd28a6..a124946d 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -304,7 +304,7 @@ Creates: - `.jaiph/.gitignore` — lists `runs` and `tmp`. If the file already exists and does not match this exact list, `jaiph init` exits with a non-zero status. - `.jaiph/bootstrap.jh` — canonical bootstrap workflow; made executable. The template uses a triple-quoted multiline prompt body (`prompt """ ... """`) so the generated file parses and compiles as valid Jaiph. It also asks the agent to review/update `.jaiph/Dockerfile` for this repository and ends by logging a summary (`WHAT CHANGED` + `WHY`). -- `.jaiph/Dockerfile` — canonical Docker sandbox template generated by init. It uses `ubuntu:latest`, installs standard utilities, Node.js LTS, Claude Code CLI, cursor-agent, then installs Jaiph via `curl -fsSL https://jaiph.org/install | bash`. If the file is missing, init creates it. If it already exists and includes the init marker comment, init updates it to the latest template. Otherwise (custom user-managed Dockerfile), init leaves it unchanged and prints a note. +- `.jaiph/Dockerfile` — Docker sandbox template that extends the official `ghcr.io/jaiphlang/jaiph-runtime:nightly` image with agent CLIs (Claude Code, cursor-agent). The base image already contains Node.js, jaiph, and `fuse-overlayfs`, so the generated Dockerfile only adds project-specific tooling. If the file is missing, init creates it. If it already exists and includes the init marker comment, init updates it to the latest template. Otherwise (custom user-managed Dockerfile), init leaves it unchanged and prints a note. - `.jaiph/SKILL.md` — copied from the skill file bundled with your Jaiph installation (or from `JAIPH_SKILL_PATH` when set). If no skill file is found, this file is not written and a note is printed. ## `jaiph install` @@ -421,7 +421,7 @@ These variables apply to `jaiph run` and workflow execution. Variables marked ** **Docker sandbox:** - `JAIPH_DOCKER_ENABLED` — set to `true` to enable Docker sandbox (overrides in-file `runtime.docker_enabled`). -- `JAIPH_DOCKER_IMAGE` — Docker image for sandbox (overrides in-file `runtime.docker_image`). +- `JAIPH_DOCKER_IMAGE` — Docker image for sandbox (overrides in-file `runtime.docker_image`). The image must already contain `jaiph`; if it does not, the run fails with `E_DOCKER_NO_JAIPH`. Defaults to the official GHCR runtime image (`ghcr.io/jaiphlang/jaiph-runtime:`). - `JAIPH_DOCKER_NETWORK` — Docker network mode (overrides in-file `runtime.docker_network`). - `JAIPH_DOCKER_TIMEOUT` — execution timeout in seconds (overrides in-file `runtime.docker_timeout`). diff --git a/docs/configuration.md b/docs/configuration.md index 7fa5b25e..ee6d7c1a 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -146,7 +146,7 @@ These configure Docker sandboxing. Unlike agent and run keys, runtime keys are r | Key | Type | Default | Env variable | Description | |-----|------|---------|--------------|-------------| | `runtime.docker_enabled` | boolean | `false` | `JAIPH_DOCKER_ENABLED` | Enable Docker for this run. | -| `runtime.docker_image` | string | `node:20-bookworm` | `JAIPH_DOCKER_IMAGE` | Image name. When unset, Jaiph builds from `.jaiph/Dockerfile` if it exists, otherwise uses the default. | +| `runtime.docker_image` | string | `ghcr.io/jaiphlang/jaiph-runtime:` | `JAIPH_DOCKER_IMAGE` | Image name. Must already contain `jaiph`. When unset, Jaiph builds from `.jaiph/Dockerfile` if it exists, otherwise uses the official GHCR image matching the installed jaiph version. | | `runtime.docker_network` | string | `default` | `JAIPH_DOCKER_NETWORK` | Docker network mode. | | `runtime.docker_timeout` | integer | `300` | `JAIPH_DOCKER_TIMEOUT` | Timeout in seconds. Invalid or unparsable values fall back to the default. | | `runtime.workspace` | string[] | `[".:/jaiph/workspace:rw"]` | _(no env override)_ | Mount list. Only settable via in-file config or defaults. | diff --git a/docs/sandboxing.md b/docs/sandboxing.md index 6d1be92d..b43d5080 100644 --- a/docs/sandboxing.md +++ b/docs/sandboxing.md @@ -50,7 +50,7 @@ All Docker-related keys live under `runtime.*` in module-level config: | Key | Type | Default | Description | |-----|------|---------|-------------| | `runtime.docker_enabled` | boolean | `false` | Enable Docker sandbox for the run. | -| `runtime.docker_image` | string | `"node:20-bookworm"` | Base container image. If it lacks `jaiph`, Jaiph builds a thin derived image and installs the current package into it. | +| `runtime.docker_image` | string | `"ghcr.io/jaiphlang/jaiph-runtime:"` | Container image. Must already contain `jaiph`. Defaults to the official GHCR runtime image matching the installed jaiph version. | | `runtime.docker_network` | string | `"default"` | Docker network mode. | | `runtime.docker_timeout` | integer | `300` | Max execution time in seconds. `0` disables the timeout. | | `runtime.workspace` | string array | `[".:/jaiph/workspace:rw"]` | Mount specifications (see below). | @@ -92,7 +92,7 @@ Host paths are resolved relative to the workspace root. Each mount is duplicated overlay-run.sh # runtime-generated entrypoint mounted ro from host temp file ``` -The working directory is `/jaiph/workspace`. The host CLI generates `overlay-run.sh` (a ~10 line bash script) to a temp file and mounts it read-only at `/jaiph/overlay-run.sh`. The container runs `/jaiph/overlay-run.sh jaiph run --raw `. The overlay wrapper sets up fuse-overlayfs, then execs the jaiph command. When the selected image does not already contain `jaiph`, the host first builds a thin derived image from that base and installs the current Jaiph package into it, so the runtime path stays generic. No `COPY` in the project Dockerfile is needed -- `overlay-run.sh` is a jaiph runtime artifact. +The working directory is `/jaiph/workspace`. The host CLI generates `overlay-run.sh` (a ~10 line bash script) to a temp file and mounts it read-only at `/jaiph/overlay-run.sh`. The container runs `/jaiph/overlay-run.sh jaiph run --raw `. The overlay wrapper sets up fuse-overlayfs, then execs the jaiph command. The image must already contain `jaiph` — Jaiph does not install itself into the container at runtime. No `COPY` in the project Dockerfile is needed for jaiph runtime files — `overlay-run.sh` is a jaiph runtime artifact. ### Runtime behavior @@ -112,6 +112,22 @@ The working directory is `/jaiph/workspace`. The host CLI generates `overlay-run **Image pull** -- If the image is not present locally, `docker pull` runs automatically. Pull failure produces `E_DOCKER_PULL`. +### Image contract + +**Every Docker image used by Jaiph must already contain a working `jaiph` CLI.** Jaiph does not auto-install itself into containers at runtime — no derived image builds, no `npm pack` bootstrap. If the selected image lacks `jaiph`, the run fails immediately with `E_DOCKER_NO_JAIPH` and guidance to use the official image or install jaiph in a custom image. + +### Official runtime image + +Jaiph publishes official runtime images to GHCR: + +| Tag | Built from | Use case | +|-----|-----------|----------| +| `ghcr.io/jaiphlang/jaiph-runtime:` | Release tags (`v*`) | Production / pinned versions | +| `ghcr.io/jaiphlang/jaiph-runtime:nightly` | `nightly` branch | Contributors and CI | +| `ghcr.io/jaiphlang/jaiph-runtime:latest` | Latest release tag | Convenience alias | + +The default `runtime.docker_image` is `ghcr.io/jaiphlang/jaiph-runtime:` where `` matches the installed jaiph package version. The official image includes Node.js, jaiph, `fuse-overlayfs`, and a non-root `jaiph` user (UID 10001). It does **not** include agent CLIs (Claude Code, cursor-agent) to keep the image small. To add agent CLIs, extend the official image or use a custom `.jaiph/Dockerfile` (see below). + ### Dockerfile-based image detection The runtime considers the image explicitly configured when either `runtime.docker_image` appears in the file or `JAIPH_DOCKER_IMAGE` is set in the environment. In that case, `.jaiph/Dockerfile` is not consulted. @@ -119,11 +135,25 @@ The runtime considers the image explicitly configured when either `runtime.docke When the image is not explicit: 1. If `.jaiph/Dockerfile` exists in the workspace root, the runtime builds it, tags the result `jaiph-runtime:latest`, and uses that image. Build failure produces `E_DOCKER_BUILD`. -2. Otherwise, the default image (`node:20-bookworm`) is pulled if needed. +2. Otherwise, the default image (`ghcr.io/jaiphlang/jaiph-runtime:`) is pulled if needed. -If the selected base image does not already contain `jaiph`, Jaiph builds a thin derived runtime image from it and installs the current local package with `npm install -g`, then runs the workflow in that derived image. +After resolving the image (whether from a Dockerfile build, an explicit image, or the default), Jaiph verifies that `jaiph` is available inside the container. If the check fails, the run exits with `E_DOCKER_NO_JAIPH`. -The repository's example `.jaiph/Dockerfile` includes `ubuntu:latest` as a base, Node.js LTS from NodeSource, `fuse-overlayfs`, Claude Code CLI, cursor-agent, and jaiph (installed via the official installer). The image creates a non-root `jaiph` user (UID 10001) and sets `USER jaiph`. Including `fuse-overlayfs` and `jaiph` in the image is still the best path for full sandbox parity and faster startup, but Jaiph can also auto-build a thin derived runtime image when the base image lacks `jaiph`. The Dockerfile does not need to copy any jaiph runtime files -- `overlay-run.sh` is generated by the host CLI and mounted into the container at runtime. +The `jaiph init` scaffold generates a `.jaiph/Dockerfile` that extends the official runtime image with agent CLIs (Claude Code, cursor-agent). The Dockerfile does not need to copy any jaiph runtime files — `overlay-run.sh` is generated by the host CLI and mounted into the container at runtime. + +### Extending the official image + +To add project-specific tools or agent CLIs to the official image, create a `.jaiph/Dockerfile`: + +```dockerfile +FROM ghcr.io/jaiphlang/jaiph-runtime:nightly + +USER root +RUN npm install -g @anthropic-ai/claude-code +USER jaiph + +# Add project-specific package managers/build tools below. +``` ### Environment variable forwarding diff --git a/e2e/lib/common.sh b/e2e/lib/common.sh index 14febd71..d8c157b3 100644 --- a/e2e/lib/common.sh +++ b/e2e/lib/common.sh @@ -463,6 +463,59 @@ EOF JAIPH_BIN_DIR="${JAIPH_E2E_BIN_DIR}" curl -fsSL "${E2E_SERVER_URL}/install" | bash } +E2E_DOCKER_TEST_IMAGE="${JAIPH_E2E_DOCKER_IMAGE:-}" +E2E_DOCKER_IMAGE_BUILT=0 + +# Build a local jaiph-e2e-runtime image from the current source tree. +# Caches the image name in E2E_DOCKER_TEST_IMAGE so it is built at most once. +e2e::ensure_docker_test_image() { + if [[ -n "${E2E_DOCKER_TEST_IMAGE}" ]]; then + return 0 + fi + if ! command -v docker >/dev/null 2>&1 || ! docker info >/dev/null 2>&1; then + return 1 + fi + local tag="jaiph-e2e-runtime:local" + if [[ "${E2E_DOCKER_IMAGE_BUILT}" == "1" ]]; then + E2E_DOCKER_TEST_IMAGE="${tag}" + export JAIPH_E2E_DOCKER_IMAGE="${tag}" + return 0 + fi + local context_dir + context_dir="$(mktemp -d)" + (cd "${E2E_REPO_ROOT}" && npm pack --pack-destination "${context_dir}" >/dev/null 2>&1) + local tarball + tarball="$(ls "${context_dir}"/jaiph-*.tgz 2>/dev/null | head -1)" + if [[ -z "${tarball}" ]]; then + rm -rf "${context_dir}" + return 1 + fi + mv "${tarball}" "${context_dir}/jaiph.tgz" + if [[ -f "${E2E_REPO_ROOT}/docker/Dockerfile.runtime" ]]; then + cp "${E2E_REPO_ROOT}/docker/Dockerfile.runtime" "${context_dir}/Dockerfile" + else + cat > "${context_dir}/Dockerfile" <<'EODOCKERFILE' +FROM node:20-bookworm-slim +RUN apt-get update && apt-get install -y --no-install-recommends bash curl git ca-certificates fuse-overlayfs fuse3 rsync && rm -rf /var/lib/apt/lists/* +RUN useradd --create-home --uid 10001 --shell /bin/bash jaiph && mkdir -p /jaiph/workspace /jaiph/workspace-ro /jaiph/run && chown -R jaiph:jaiph /jaiph +ARG JAIPH_TARBALL=jaiph.tgz +COPY ${JAIPH_TARBALL} /tmp/jaiph.tgz +RUN npm install -g /tmp/jaiph.tgz && rm -f /tmp/jaiph.tgz +USER jaiph +ENV HOME=/home/jaiph +ENV PATH="/home/jaiph/.local/bin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" +WORKDIR /jaiph/workspace +EODOCKERFILE + fi + if docker build -t "${tag}" --build-arg JAIPH_TARBALL=jaiph.tgz "${context_dir}" >/dev/null 2>&1; then + E2E_DOCKER_IMAGE_BUILT=1 + E2E_DOCKER_TEST_IMAGE="${tag}" + export JAIPH_E2E_DOCKER_IMAGE="${tag}" + fi + rm -rf "${context_dir}" + [[ -n "${E2E_DOCKER_TEST_IMAGE}" ]] +} + e2e::prepare_test_env() { local test_name="$1" e2e::prepare_shared_context diff --git a/e2e/tests/00_install_and_init.sh b/e2e/tests/00_install_and_init.sh index a293b47a..7e5dc51c 100644 --- a/e2e/tests/00_install_and_init.sh +++ b/e2e/tests/00_install_and_init.sh @@ -74,40 +74,20 @@ e2e::pass "generated bootstrap workflow compiles" e2e::assert_file_exists "${TEST_DIR}/.jaiph/Dockerfile" ".jaiph/Dockerfile exists" expected_dockerfile="$(mktemp)" cat > "${expected_dockerfile}" <<'EOF' -FROM ubuntu:latest +# Extends the official jaiph runtime image with agent CLIs for prompt steps. +# The base image already contains Node.js, jaiph, and fuse-overlayfs. +# For a minimal image without agent CLIs, use ghcr.io/jaiphlang/jaiph-runtime directly. +FROM ghcr.io/jaiphlang/jaiph-runtime:nightly # Generated by jaiph init for project sandboxing. # Keep this file aligned with your repository's runtime/build/test needs. -# Standard utilities + fuse-overlayfs for CoW sandbox -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - bash \ - curl \ - git \ - ca-certificates \ - gnupg \ - fuse-overlayfs \ - fuse3 \ - rsync && \ - rm -rf /var/lib/apt/lists/* - -# Node.js latest LTS (required by jaiph prompt stream helpers) -RUN curl -fsSL https://deb.nodesource.com/setup_lts.x | bash - && \ - apt-get install -y --no-install-recommends nodejs && \ - rm -rf /var/lib/apt/lists/* - -# Non-root user keeps agent CLIs happy in Docker mode. -RUN useradd --create-home --uid 10001 --shell /bin/bash jaiph && \ - mkdir -p /jaiph/workspace /jaiph/workspace-ro /jaiph/run && \ - chown -R jaiph:jaiph /jaiph +USER root # Claude Code CLI (Anthropic) RUN npm install -g @anthropic-ai/claude-code USER jaiph -ENV HOME=/home/jaiph -ENV PATH="/home/jaiph/.local/bin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" # cursor-agent (Cursor) — install as the runtime user so the binary remains # reachable after switching away from root. The installer currently places @@ -126,16 +106,13 @@ RUN mkdir -p "$HOME/.local/bin" && \ command -v cursor-agent >/dev/null 2>&1 && \ rm -f /tmp/install-cursor-agent.sh -# jaiph (official installer: https://jaiph.org/install) -RUN curl -fsSL https://jaiph.org/install | bash - # Add project-specific package managers/build tools below as needed. WORKDIR /jaiph/workspace EOF if ! cmp -s "${TEST_DIR}/.jaiph/Dockerfile" "${expected_dockerfile}"; then rm -f "${expected_dockerfile}" - e2e::fail "Expected .jaiph/Dockerfile to match init template with jaiph.org installer" + e2e::fail "Expected .jaiph/Dockerfile to match init template extending official GHCR image" fi rm -f "${expected_dockerfile}" e2e::pass ".jaiph/Dockerfile matches expected init template content" diff --git a/e2e/tests/72_docker_run_artifacts.sh b/e2e/tests/72_docker_run_artifacts.sh index face0baa..c461002d 100755 --- a/e2e/tests/72_docker_run_artifacts.sh +++ b/e2e/tests/72_docker_run_artifacts.sh @@ -16,6 +16,13 @@ if ! command -v docker >/dev/null 2>&1 || ! docker info >/dev/null 2>&1; then exit 0 fi +# Build a local test image with jaiph installed from current source. +if ! e2e::ensure_docker_test_image; then + e2e::section "docker run artifacts (skipped — test image build failed)" + e2e::skip "Could not build local Docker test image" + exit 0 +fi + e2e::section "docker run artifacts — happy path" # Given: a simple workflow that produces stdout artifacts @@ -32,9 +39,9 @@ workflow default() { } EOF -# When: run with Docker enabled (override the e2e default of JAIPH_DOCKER_ENABLED=false) -if ! JAIPH_DOCKER_ENABLED=true jaiph run "${TEST_DIR}/docker_artifacts.jh" >/dev/null 2>&1; then - JAIPH_DOCKER_ENABLED=true jaiph run "${TEST_DIR}/docker_artifacts.jh" +# When: run with Docker enabled using the E2E test image +if ! JAIPH_DOCKER_ENABLED=true JAIPH_DOCKER_IMAGE="${E2E_DOCKER_TEST_IMAGE}" jaiph run "${TEST_DIR}/docker_artifacts.jh" >/dev/null 2>&1; then + JAIPH_DOCKER_ENABLED=true JAIPH_DOCKER_IMAGE="${E2E_DOCKER_TEST_IMAGE}" jaiph run "${TEST_DIR}/docker_artifacts.jh" e2e::fail "docker: jaiph run docker_artifacts.jh failed" fi @@ -77,7 +84,7 @@ EOF rm -rf "${TEST_DIR}/custom_runs" # When: run with Docker and relative JAIPH_RUNS_DIR -(cd "${TEST_DIR}" && JAIPH_DOCKER_ENABLED=true JAIPH_RUNS_DIR="custom_runs" jaiph run "${TEST_DIR}/docker_rel_runs.jh" >/dev/null 2>&1) +(cd "${TEST_DIR}" && JAIPH_DOCKER_ENABLED=true JAIPH_DOCKER_IMAGE="${E2E_DOCKER_TEST_IMAGE}" JAIPH_RUNS_DIR="custom_runs" jaiph run "${TEST_DIR}/docker_rel_runs.jh" >/dev/null 2>&1) # Then: artifacts should be under the relative dir on host rel_run_dir="$(e2e::run_dir_at "${TEST_DIR}/custom_runs" "docker_rel_runs.jh")" @@ -105,7 +112,7 @@ abs_runs_dir="${TEST_DIR}/abs_runs" rm -rf "${abs_runs_dir}" # When: run with absolute JAIPH_RUNS_DIR inside workspace -JAIPH_DOCKER_ENABLED=true JAIPH_RUNS_DIR="${abs_runs_dir}" jaiph run "${TEST_DIR}/docker_abs_runs.jh" >/dev/null 2>&1 +JAIPH_DOCKER_ENABLED=true JAIPH_DOCKER_IMAGE="${E2E_DOCKER_TEST_IMAGE}" JAIPH_RUNS_DIR="${abs_runs_dir}" jaiph run "${TEST_DIR}/docker_abs_runs.jh" >/dev/null 2>&1 # Then: artifacts should be under the absolute path on host abs_run_dir="$(e2e::run_dir_at "${abs_runs_dir}" "docker_abs_runs.jh")" @@ -131,9 +138,29 @@ EOF # When/Then: absolute path outside workspace should fail outside_dir="/tmp/jaiph-outside-workspace-test-$$" -if JAIPH_DOCKER_ENABLED=true JAIPH_RUNS_DIR="${outside_dir}" jaiph run "${TEST_DIR}/docker_outside.jh" >/dev/null 2>&1; then +if JAIPH_DOCKER_ENABLED=true JAIPH_DOCKER_IMAGE="${E2E_DOCKER_TEST_IMAGE}" JAIPH_RUNS_DIR="${outside_dir}" jaiph run "${TEST_DIR}/docker_outside.jh" >/dev/null 2>&1; then rm -rf "${outside_dir}" e2e::fail "docker: absolute JAIPH_RUNS_DIR outside workspace should fail" fi rm -rf "${outside_dir}" e2e::pass "docker: absolute JAIPH_RUNS_DIR outside workspace exits non-zero" + +e2e::section "docker run artifacts — image without jaiph fails fast" + +# Given: a workflow and a stock image that does NOT contain jaiph +e2e::file "docker_no_jaiph.jh" <<'EOF' +script greet_impl = ``` +echo "should not run" +``` +workflow default() { + run greet_impl() +} +EOF + +# When/Then: using an image without jaiph should fail with E_DOCKER_NO_JAIPH +error_output="" +if error_output="$(JAIPH_DOCKER_ENABLED=true JAIPH_DOCKER_IMAGE=node:20-bookworm-slim jaiph run "${TEST_DIR}/docker_no_jaiph.jh" 2>&1)"; then + e2e::fail "docker: image without jaiph should fail" +fi +# assert_contains: error message varies by image name and guidance text +e2e::assert_contains "${error_output}" "E_DOCKER_NO_JAIPH" "docker: missing jaiph produces E_DOCKER_NO_JAIPH error" diff --git a/e2e/tests/73_docker_dockerfile_detection.sh b/e2e/tests/73_docker_dockerfile_detection.sh index 2bf3fee2..286e9bd1 100644 --- a/e2e/tests/73_docker_dockerfile_detection.sh +++ b/e2e/tests/73_docker_dockerfile_detection.sh @@ -16,13 +16,33 @@ if ! command -v docker >/dev/null 2>&1 || ! docker info >/dev/null 2>&1; then exit 0 fi +# Build the E2E test image (used for explicit-image tests below). +if ! e2e::ensure_docker_test_image; then + e2e::section "docker dockerfile detection (skipped — test image build failed)" + e2e::skip "Could not build local Docker test image" + exit 0 +fi + e2e::section "docker dockerfile detection — custom Dockerfile builds and runs" -# Given: a .jaiph/Dockerfile that produces a minimal image with a marker file +# Given: a .jaiph/Dockerfile that produces an image with jaiph AND a marker file. +# We install jaiph from a local tarball so the custom image satisfies the strict contract. mkdir -p "${TEST_DIR}/.jaiph" -cat > "${TEST_DIR}/.jaiph/Dockerfile" <<'DOCKERFILE' -FROM node:20-bookworm + +(cd "${ROOT_DIR}" && npm pack --pack-destination "${TEST_DIR}/.jaiph" >/dev/null 2>&1) +tarball_name="$(ls "${TEST_DIR}/.jaiph"/jaiph-*.tgz 2>/dev/null | head -1 | xargs basename)" + +cat > "${TEST_DIR}/.jaiph/Dockerfile" </dev/null 2>&1 +JAIPH_DOCKER_ENABLED=true JAIPH_DOCKER_IMAGE="${E2E_DOCKER_TEST_IMAGE}" jaiph run "${TEST_DIR}/dockerfile_skip.jh" >/dev/null 2>&1 -# Then: the marker file should NOT exist (stock pulled image, not custom build) +# Then: the marker file should NOT exist (E2E test image, not custom build) e2e::expect_run_file "dockerfile_skip.jh" "000003-script__check_no_marker_impl.out" "no marker" e2e::pass "docker: explicit image skips .jaiph/Dockerfile" -e2e::section "docker dockerfile detection — fallback without Dockerfile" +e2e::section "docker dockerfile detection — fallback without Dockerfile uses configured image" -# Given: a separate test dir without .jaiph/Dockerfile +# Given: a separate test dir without .jaiph/Dockerfile, using the E2E test image fallback_dir="$(mktemp -d "${JAIPH_E2E_WORK_DIR}/docker_fallback.XXXXXX")" cat > "${fallback_dir}/fallback.jh" <<'EOF' script greet_impl = ``` @@ -90,14 +110,14 @@ workflow default() { } EOF -# When: run with Docker enabled but no .jaiph/Dockerfile present -JAIPH_DOCKER_ENABLED=true JAIPH_WORKSPACE="${fallback_dir}" jaiph run "${fallback_dir}/fallback.jh" >/dev/null 2>&1 +# When: run with Docker enabled and explicit E2E image (no .jaiph/Dockerfile present) +JAIPH_DOCKER_ENABLED=true JAIPH_DOCKER_IMAGE="${E2E_DOCKER_TEST_IMAGE}" JAIPH_WORKSPACE="${fallback_dir}" jaiph run "${fallback_dir}/fallback.jh" >/dev/null 2>&1 -# Then: should use default Node image (bash + node for JS kernel) and succeed +# Then: should succeed using the configured image fallback_run_dir="$(e2e::run_dir_at "${fallback_dir}/.jaiph/runs" "fallback.jh")" fallback_summary="${fallback_run_dir}run_summary.jsonl" e2e::assert_file_exists "${fallback_summary}" "docker: fallback run_summary.jsonl exists" -e2e::pass "docker: falls back to default image without .jaiph/Dockerfile" +e2e::pass "docker: falls back to configured image without .jaiph/Dockerfile" e2e::section "docker dockerfile detection — agent env vars are forwarded" @@ -118,6 +138,7 @@ EOF # When: run with agent env vars set on host JAIPH_DOCKER_ENABLED=true \ + JAIPH_DOCKER_IMAGE="${E2E_DOCKER_TEST_IMAGE}" \ ANTHROPIC_API_KEY="test-key-123" \ CURSOR_SESSION="test-session-456" \ jaiph run "${TEST_DIR}/envforward.jh" >/dev/null 2>&1 diff --git a/e2e/tests/74_docker_lifecycle.sh b/e2e/tests/74_docker_lifecycle.sh index c2dd9dcb..54f2bbee 100755 --- a/e2e/tests/74_docker_lifecycle.sh +++ b/e2e/tests/74_docker_lifecycle.sh @@ -16,6 +16,13 @@ if ! command -v docker >/dev/null 2>&1 || ! docker info >/dev/null 2>&1; then exit 0 fi +# Build a local test image with jaiph installed from current source. +if ! e2e::ensure_docker_test_image; then + e2e::section "docker lifecycle (skipped — test image build failed)" + e2e::skip "Could not build local Docker test image" + exit 0 +fi + # --------------------------------------------------------------------------- # Early container exit / failed startup path # --------------------------------------------------------------------------- @@ -39,7 +46,7 @@ EOF # When: run with Docker enabled — the container should fail and jaiph should # exit promptly (within 30 seconds), not hang in RUNNING. -if timeout 30 bash -c "JAIPH_DOCKER_ENABLED=true jaiph run '${TEST_DIR}/early_exit.jh' >/dev/null 2>&1"; then +if timeout 30 bash -c "JAIPH_DOCKER_ENABLED=true JAIPH_DOCKER_IMAGE='${E2E_DOCKER_TEST_IMAGE}' jaiph run '${TEST_DIR}/early_exit.jh' >/dev/null 2>&1"; then e2e::fail "docker: early_exit.jh should have failed but exited 0" fi exit_code=$? @@ -77,7 +84,7 @@ workflow default() { EOF # When: run with Docker enabled -if ! timeout 60 bash -c "JAIPH_DOCKER_ENABLED=true jaiph run '${TEST_DIR}/stream_check.jh' >/dev/null 2>&1"; then +if ! timeout 60 bash -c "JAIPH_DOCKER_ENABLED=true JAIPH_DOCKER_IMAGE='${E2E_DOCKER_TEST_IMAGE}' jaiph run '${TEST_DIR}/stream_check.jh' >/dev/null 2>&1"; then e2e::fail "docker: stream_check.jh failed" fi diff --git a/src/cli/commands/init.test.ts b/src/cli/commands/init.test.ts index e411f5db..af7fdd21 100644 --- a/src/cli/commands/init.test.ts +++ b/src/cli/commands/init.test.ts @@ -50,17 +50,16 @@ test("init: generated bootstrap uses triple-quoted prompt and parses", () => { } }); -test("init: creates .jaiph/Dockerfile with jaiph installer", () => { +test("init: creates .jaiph/Dockerfile extending official GHCR image", () => { const dir = makeTempDir(); try { assert.equal(runInit([dir]), 0); const dockerfilePath = join(dir, ".jaiph", "Dockerfile"); assert.equal(existsSync(dockerfilePath), true); const dockerfile = readFileSync(dockerfilePath, "utf8"); - assert.equal(dockerfile.includes("FROM ubuntu:latest"), true); - assert.equal(dockerfile.includes("ca-certificates"), true); - assert.equal(dockerfile.includes("setup_lts.x"), true); - assert.equal(dockerfile.includes(JAIPH_INSTALL_COMMAND), true); + assert.equal(dockerfile.includes("ghcr.io/jaiphlang/jaiph-runtime"), true); + assert.equal(dockerfile.includes("cursor"), true); + assert.equal(dockerfile.includes("claude-code"), true); } finally { rmSync(dir, { recursive: true, force: true }); } diff --git a/src/cli/commands/init.ts b/src/cli/commands/init.ts index d777bccb..43b16865 100644 --- a/src/cli/commands/init.ts +++ b/src/cli/commands/init.ts @@ -37,40 +37,20 @@ workflow default() { const JAIPH_DIR_GITIGNORE_TEMPLATE = "runs\ntmp\n"; const DOCKERFILE_TEMPLATE_MARKER = "# Generated by jaiph init for project sandboxing."; const JAIPH_INSTALL_COMMAND = "curl -fsSL https://jaiph.org/install | bash"; -const JAIPH_DOCKERFILE_TEMPLATE = `FROM ubuntu:latest +const JAIPH_DOCKERFILE_TEMPLATE = `# Extends the official jaiph runtime image with agent CLIs for prompt steps. +# The base image already contains Node.js, jaiph, and fuse-overlayfs. +# For a minimal image without agent CLIs, use ghcr.io/jaiphlang/jaiph-runtime directly. +FROM ghcr.io/jaiphlang/jaiph-runtime:nightly ${DOCKERFILE_TEMPLATE_MARKER} # Keep this file aligned with your repository's runtime/build/test needs. -# Standard utilities + fuse-overlayfs for CoW sandbox -RUN apt-get update && \\ - apt-get install -y --no-install-recommends \\ - bash \\ - curl \\ - git \\ - ca-certificates \\ - gnupg \\ - fuse-overlayfs \\ - fuse3 \\ - rsync && \\ - rm -rf /var/lib/apt/lists/* - -# Node.js latest LTS (required by jaiph prompt stream helpers) -RUN curl -fsSL https://deb.nodesource.com/setup_lts.x | bash - && \\ - apt-get install -y --no-install-recommends nodejs && \\ - rm -rf /var/lib/apt/lists/* - -# Non-root user keeps agent CLIs happy in Docker mode. -RUN useradd --create-home --uid 10001 --shell /bin/bash jaiph && \\ - mkdir -p /jaiph/workspace /jaiph/workspace-ro /jaiph/run && \\ - chown -R jaiph:jaiph /jaiph +USER root # Claude Code CLI (Anthropic) RUN npm install -g @anthropic-ai/claude-code USER jaiph -ENV HOME=/home/jaiph -ENV PATH="/home/jaiph/.local/bin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" # cursor-agent (Cursor) — install as the runtime user so the binary remains # reachable after switching away from root. The installer currently places @@ -89,9 +69,6 @@ RUN mkdir -p "$HOME/.local/bin" && \\ command -v cursor-agent >/dev/null 2>&1 && \\ rm -f /tmp/install-cursor-agent.sh -# jaiph (official installer: https://jaiph.org/install) -RUN ${JAIPH_INSTALL_COMMAND} - # Add project-specific package managers/build tools below as needed. WORKDIR /jaiph/workspace diff --git a/src/runtime/docker.test.ts b/src/runtime/docker.test.ts index 9afbe728..c9d4ef7a 100644 --- a/src/runtime/docker.test.ts +++ b/src/runtime/docker.test.ts @@ -13,6 +13,8 @@ import { writeOverlayScript, resolveImage, buildImageFromDockerfile, + verifyImageHasJaiph, + GHCR_IMAGE_REPO, type MountSpec, type DockerRunConfig, type DockerSpawnOptions, @@ -137,7 +139,7 @@ test("parseMounts: throws when no workspace mount", () => { test("resolveDockerConfig: defaults when no in-file and no env", () => { const cfg = resolveDockerConfig(undefined, {}); assert.equal(cfg.enabled, false); - assert.equal(cfg.image, "node:20-bookworm"); + assert.ok(cfg.image.startsWith(GHCR_IMAGE_REPO + ":"), `default image should be GHCR: ${cfg.image}`); assert.equal(cfg.network, "default"); assert.equal(cfg.timeout, 300); assert.equal(cfg.mounts.length, 1); @@ -505,6 +507,14 @@ test("resolveDockerConfig: imageExplicit is true when in-file sets image", () => assert.equal(cfg.image, "alpine:3.19"); }); +// --------------------------------------------------------------------------- +// GHCR_IMAGE_REPO +// --------------------------------------------------------------------------- + +test("GHCR_IMAGE_REPO: points to official registry", () => { + assert.equal(GHCR_IMAGE_REPO, "ghcr.io/jaiphlang/jaiph-runtime"); +}); + // --------------------------------------------------------------------------- // resolveImage // --------------------------------------------------------------------------- @@ -540,3 +550,25 @@ test("resolveImage: skips Dockerfile when imageExplicit is true", () => { rmSync(tmpDir, { recursive: true, force: true }); } }); + +// --------------------------------------------------------------------------- +// Strict contract: no auto-build, no npm pack bootstrap +// --------------------------------------------------------------------------- + +test("docker.ts: no auto-build or npm-pack bootstrap code", () => { + const src = readFileSync(join(__dirname, "docker.ts"), "utf8"); + assert.ok(!src.includes("npm pack"), "docker.ts must not contain npm pack"); + assert.ok(!src.includes("npm install -g"), "docker.ts must not contain npm install -g"); + assert.ok(!src.includes("jaiph-runtime-auto"), "docker.ts must not reference auto-derived image tag"); + assert.ok(!src.includes("ensureLocalRuntimeImage"), "docker.ts must not contain ensureLocalRuntimeImage"); + assert.ok(!src.includes("buildRuntimeImageFromLocalPackage"), "docker.ts must not contain buildRuntimeImageFromLocalPackage"); +}); + +test("verifyImageHasJaiph: throws E_DOCKER_NO_JAIPH with guidance for missing jaiph", () => { + // Unit-test the error message structure without running Docker. + // verifyImageHasJaiph uses imageHasJaiph internally which spawns Docker, + // so we test the error message format by checking the source contract. + const src = readFileSync(join(__dirname, "docker.ts"), "utf8"); + assert.ok(src.includes("E_DOCKER_NO_JAIPH"), "verifyImageHasJaiph must use E_DOCKER_NO_JAIPH error code"); + assert.ok(src.includes(GHCR_IMAGE_REPO), "error message must reference official GHCR image"); +}); diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts index 400d6f4a..83541724 100644 --- a/src/runtime/docker.ts +++ b/src/runtime/docker.ts @@ -1,6 +1,5 @@ import { execFileSync, execSync, spawn, ChildProcess } from "node:child_process"; -import { createHash } from "node:crypto"; -import { existsSync, mkdirSync, mkdtempSync, readdirSync, rmSync, statSync, writeFileSync } from "node:fs"; +import { existsSync, mkdirSync, mkdtempSync, readFileSync, readdirSync, rmSync, statSync, writeFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { join, resolve, dirname, relative } from "node:path"; import type { RuntimeConfig } from "../types"; @@ -81,10 +80,25 @@ export function validateMounts(mounts: MountSpec[]): void { // Config resolution (env > in-file > defaults) // --------------------------------------------------------------------------- +/** Read the package version to derive the default GHCR image tag. */ +function resolveDefaultImageTag(): string { + try { + const pkgPath = resolve(__dirname, "..", "..", "..", "package.json"); + const pkg = JSON.parse(readFileSync(pkgPath, "utf8")); + if (pkg.version && typeof pkg.version === "string") { + return pkg.version; + } + } catch { + // Fall through to nightly. + } + return "nightly"; +} + +export const GHCR_IMAGE_REPO = "ghcr.io/jaiphlang/jaiph-runtime"; + const DEFAULTS: DockerRunConfig = { enabled: false, - /** Node + bash; required for JS kernel (run-step-exec) inside the container. */ - image: "node:20-bookworm", + image: `${GHCR_IMAGE_REPO}:${resolveDefaultImageTag()}`, imageExplicit: false, network: "default", timeout: 300, @@ -175,7 +189,6 @@ export function pullImageIfNeeded(image: string): void { // --------------------------------------------------------------------------- const DOCKERFILE_IMAGE_TAG = "jaiph-runtime:latest"; -const AUTO_RUNTIME_IMAGE_REPO = "jaiph-runtime-auto"; /** * Build a Docker image from a Dockerfile and tag it. @@ -194,26 +207,6 @@ export function buildImageFromDockerfile(dockerfilePath: string, tag: string = D return tag; } -function installedPackageRoot(): string { - return resolve(__dirname, "..", "..", ".."); -} - -function autoRuntimeImageTag(baseImage: string, packageRoot: string): string { - const packageJsonPath = join(packageRoot, "package.json"); - const cliPath = join(packageRoot, "dist", "src", "cli.js"); - const dockerRuntimePath = join(packageRoot, "dist", "src", "runtime", "docker.js"); - const nodeWorkflowRuntimePath = join(packageRoot, "dist", "src", "runtime", "kernel", "node-workflow-runtime.js"); - const packageStamp = existsSync(packageJsonPath) ? statSync(packageJsonPath).mtimeMs : 0; - const cliStamp = existsSync(cliPath) ? statSync(cliPath).mtimeMs : 0; - const dockerRuntimeStamp = existsSync(dockerRuntimePath) ? statSync(dockerRuntimePath).mtimeMs : 0; - const nodeWorkflowRuntimeStamp = existsSync(nodeWorkflowRuntimePath) ? statSync(nodeWorkflowRuntimePath).mtimeMs : 0; - const digest = createHash("sha256") - .update(`${baseImage}|${resolve(packageRoot)}|${packageStamp}|${cliStamp}|${dockerRuntimeStamp}|${nodeWorkflowRuntimeStamp}`) - .digest("hex") - .slice(0, 12); - return `${AUTO_RUNTIME_IMAGE_REPO}:${digest}`; -} - function imageHasJaiph(image: string): boolean { try { execFileSync( @@ -227,103 +220,17 @@ function imageHasJaiph(image: string): boolean { } } -function imageConfiguredUser(image: string): string | undefined { - try { - const raw = execFileSync( - "docker", - ["image", "inspect", image, "--format", "{{json .Config.User}}"], - { encoding: "utf8", timeout: 30_000 }, - ).trim(); - const parsed = JSON.parse(raw) as string; - return parsed.length > 0 ? parsed : undefined; - } catch { - return undefined; - } -} - -function imageHomeDir(image: string): string | undefined { - try { - const raw = execFileSync( - "docker", - ["image", "inspect", image, "--format", "{{json .Config.Env}}"], - { encoding: "utf8", timeout: 30_000 }, - ).trim(); - const envList = JSON.parse(raw) as string[]; - for (const entry of envList) { - if (entry.startsWith("HOME=")) { - const value = entry.slice("HOME=".length); - return value.length > 0 ? value : undefined; - } - } - } catch { - // Fall through. - } - return undefined; -} - -function buildRuntimeImageFromLocalPackage(baseImage: string, packageRoot: string, tag: string): string { - const contextDir = mkdtempSync(join(tmpdir(), "jaiph-runtime-image-")); - try { - const tarballName = execFileSync( - "npm", - ["pack", packageRoot, "--silent", "--pack-destination", contextDir], - { cwd: packageRoot, encoding: "utf8", timeout: 300_000 }, - ).trim().split(/\r?\n/).pop()?.trim(); - if (!tarballName) { - throw new Error("npm pack produced no tarball"); - } - const originalUser = imageConfiguredUser(baseImage); - const originalHome = imageHomeDir(baseImage); - writeFileSync( - join(contextDir, "Dockerfile"), - [ - `FROM ${baseImage}`, - `USER root`, - `COPY ${tarballName} /tmp/${tarballName}`, - `RUN npm install -g /tmp/${tarballName} && rm -f /tmp/${tarballName}` + - (originalHome - ? ` && JAIPH_NPM_BIN="$(npm prefix -g)/bin/jaiph" && mkdir -p ${originalHome}/.local/bin && ln -sf "$JAIPH_NPM_BIN" ${originalHome}/.local/bin/jaiph` - : ""), - ...(originalUser ? [`USER ${originalUser}`] : []), - "", - ].join("\n"), +/** + * Verify that the selected Docker image contains `jaiph`. + * Fails fast with an actionable error when the binary is missing. + */ +export function verifyImageHasJaiph(image: string): void { + if (!imageHasJaiph(image)) { + throw new Error( + `E_DOCKER_NO_JAIPH the Docker image "${image}" does not contain a jaiph CLI. ` + + `Use the official runtime image (${GHCR_IMAGE_REPO}:) or install jaiph ` + + `in your custom image. See https://jaiph.org/sandboxing for details.`, ); - execFileSync("docker", ["build", "-t", tag, contextDir], { - stdio: "inherit", - timeout: 600_000, - }); - return tag; - } catch { - throw new Error(`E_DOCKER_BUILD failed to build runtime image from base "${baseImage}"`); - } finally { - rmSync(contextDir, { recursive: true, force: true }); - } -} - -function ensureLocalRuntimeImage(baseImage: string): string { - pullImageIfNeeded(baseImage); - const packageRoot = installedPackageRoot(); - const tag = autoRuntimeImageTag(baseImage, packageRoot); - try { - execSync(`docker image inspect ${tag}`, { stdio: "ignore", timeout: 30_000 }); - return tag; - } catch { - return buildRuntimeImageFromLocalPackage(baseImage, packageRoot, tag); - } -} - -function ensureImageHasJaiph(baseImage: string): string { - pullImageIfNeeded(baseImage); - if (imageHasJaiph(baseImage)) { - return baseImage; - } - const packageRoot = installedPackageRoot(); - const tag = autoRuntimeImageTag(baseImage, packageRoot); - try { - execSync(`docker image inspect ${tag}`, { stdio: "ignore", timeout: 30_000 }); - return tag; - } catch { - return buildRuntimeImageFromLocalPackage(baseImage, packageRoot, tag); } } @@ -332,19 +239,26 @@ function ensureImageHasJaiph(baseImage: string): string { * * When the image was not explicitly configured (`imageExplicit === false`), * checks for `.jaiph/Dockerfile` in the workspace root. If present, builds - * from it and returns the built image tag. Otherwise falls back to the - * configured (default) image and pulls it if needed. + * from it and verifies jaiph is present. Otherwise uses the configured + * (default) image — the official GHCR runtime image — and pulls if needed. + * + * All images are verified to contain `jaiph` before use. If the image + * lacks jaiph, the run fails immediately with guidance. */ export function resolveImage(config: DockerRunConfig, workspaceRoot: string): string { - let baseImage = config.image; + let image = config.image; if (!config.imageExplicit) { const dockerfilePath = join(workspaceRoot, ".jaiph", "Dockerfile"); if (existsSync(dockerfilePath)) { - baseImage = buildImageFromDockerfile(dockerfilePath); + image = buildImageFromDockerfile(dockerfilePath); + } else { + pullImageIfNeeded(image); } - return ensureLocalRuntimeImage(baseImage); + } else { + pullImageIfNeeded(image); } - return ensureImageHasJaiph(baseImage); + verifyImageHasJaiph(image); + return image; } // --------------------------------------------------------------------------- From c5e20c043f8787b33454823b7d6a264d37591d40 Mon Sep 17 00:00:00 2001 From: Jakub Dzikowski Date: Fri, 17 Apr 2026 22:01:26 +0200 Subject: [PATCH 06/38] Feat: Add optional module manifest keys to config block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Support module.name, module.version, and module.description as optional string keys in the module-level config { } block. Values are stored on WorkflowMetadata.module as descriptive metadata only — they do not affect agent, run, or runtime behavior. Workflow-level config blocks reject module.* keys with E_PARSE, consistent with the existing runtime.* guard. The formatter round-trips all three keys. Unit tests cover happy path, partial keys, coexistence, round-trip, and workflow-level rejection. Docs and grammar updated. Co-Authored-By: Claude Opus 4.6 --- CHANGELOG.md | 1 + QUEUE.md | 47 ------------------ docs/configuration.md | 30 +++++++++++- docs/grammar.md | 5 +- docs/index.html | 4 +- src/format/emit.ts | 14 ++++++ src/parse/metadata.ts | 21 ++++++++ src/parse/parse-metadata.test.ts | 82 ++++++++++++++++++++++++++++++++ src/parse/workflows.ts | 6 +++ src/types.ts | 1 + 10 files changed, 159 insertions(+), 52 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dd64875c..ea08d0e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## All changes +- **Feature — Language:** Optional module manifest keys (`module.name`, `module.version`, `module.description`) — The module-level `config { }` block now accepts three optional descriptive metadata keys: `module.name`, `module.version`, and `module.description`. All three are strings, all optional, and purely informational — they do not affect agent, run, or runtime behavior. Values are stored on `WorkflowMetadata.module` and round-trip through `jaiph format`. No semver validation is applied to `module.version`; any quoted string is accepted. Workflow-level `config` blocks reject `module.*` keys with `E_PARSE`, consistent with the existing `runtime.*` workflow guard. Future features (e.g. MCP tool metadata) may consume these fields. Implementation: `ALLOWED_KEYS` and `assignConfigKey` in `src/parse/metadata.ts`, `WorkflowMetadata.module` in `src/types.ts`, formatter round-trip in `src/format/emit.ts`, workflow-level rejection in `src/parse/workflows.ts`. Unit tests cover happy path, partial keys, coexistence with other config keys, formatter round-trip, and workflow-level rejection. Docs updated (`docs/configuration.md`, `docs/grammar.md`). - **Breaking — Docker:** Strict image contract and official GHCR runtime images — Docker mode now enforces a strict contract: every Docker image used by Jaiph must already contain a working `jaiph` CLI. Jaiph no longer auto-builds derived images or bootstraps itself into containers at runtime (no `npm pack`, no `npm install -g` into arbitrary base images). If the selected image lacks `jaiph`, the run fails immediately with `E_DOCKER_NO_JAIPH` and actionable guidance. The default `runtime.docker_image` is now `ghcr.io/jaiphlang/jaiph-runtime:` (matching the installed jaiph version), replacing the previous `node:20-bookworm` default. Official runtime images are published to GHCR: `ghcr.io/jaiphlang/jaiph-runtime:` for release tags, `:nightly` for the nightly branch, and `:latest` as a convenience alias. The official image includes Node.js, jaiph, `fuse-overlayfs`, and a non-root `jaiph` user (UID 10001); it does not include agent CLIs to keep the image minimal. The `jaiph init` Dockerfile template now extends the official image (`FROM ghcr.io/jaiphlang/jaiph-runtime:nightly`) and only adds agent CLIs (Claude Code, cursor-agent), instead of building from `ubuntu:latest` with a full install chain. Removed functions: `ensureLocalRuntimeImage`, `buildRuntimeImageFromLocalPackage`, `autoRuntimeImageTag`, `imageConfiguredUser`, `imageHomeDir`. Added: `verifyImageHasJaiph`, `GHCR_IMAGE_REPO`, `resolveDefaultImageTag`. CI: new `.github/workflows/docker-publish.yml` publishes the runtime image on release tags and nightly pushes. Implementation: `src/runtime/docker.ts`, `src/cli/commands/init.ts`, `docker/Dockerfile.runtime`. Unit and E2E tests updated for the strict contract — regression test confirms images without jaiph fail with `E_DOCKER_NO_JAIPH`. Docs updated (`docs/sandboxing.md`, `docs/configuration.md`, `docs/cli.md`, `docs/architecture.md`). - **Feature — Language/Runtime:** Explicit nested managed calls in argument position — Call arguments can now contain nested managed calls using `run` or `ensure` keywords explicitly: `run foo(run bar())`, `run foo(ensure rule_bar())`, and `run foo(run \`echo "aaa"\`())`. The nested call executes first and its result is passed as a single argument to the outer call. Bare call-like forms in argument position are rejected at compile time: `run foo(bar())` → `E_VALIDATE` with an actionable message telling the user to add `run` or `ensure`. Bare inline script calls in argument position (`run foo(\`echo aaa\`())`) are also rejected with guidance. The explicit capture-then-pass form (`const x = run bar()` followed by `run foo(x)`) remains valid. Bare call-like forms in `const` assignments (`const x = bar()`) are also rejected — use `const x = run bar()`. The formatter round-trips explicit nested forms correctly, including the inline script variant. The runtime evaluates nested managed argument tokens (workflows, scripts, rules, and inline scripts) before passing the result to the outer call. Implementation: validator (`src/transpile/validate.ts` — `validateNestedManagedCallArgs` extended for inline script detection), runtime (`src/runtime/kernel/node-workflow-runtime.ts` — `managed_inline_script` token kind, `parseInlineScriptAt`, `resolveArgsRawSync` fast path), formatter (`src/format/emit.ts` — `parseInlineScriptArg`, inline script formatting in `formatArgs`). Regression tests added for all valid and invalid forms. Docs updated (`docs/language.md`, `docs/grammar.md`, `docs/jaiph-skill.md`). diff --git a/QUEUE.md b/QUEUE.md index e6ffada4..7c992336 100644 --- a/QUEUE.md +++ b/QUEUE.md @@ -12,53 +12,6 @@ Process rules: *** -## Support optional config properties in Jaiph DSL: version, name, description. #dev-ready - -**Goal** - -Add optional module-scoped manifest fields in the module-level `config { }` block so a `.jh` file can declare human-readable **name**, **version**, and **description** without changing agent/run/runtime execution. - -**Keys (dot-separated, string values)** - -* `module.name` -* `module.version` -* `module.description` - -All optional; omitted keys leave the corresponding field unset. - -**Semantics** - -* Values use the same double-quoted string rules as other config strings (existing escapes). No semver validation in v1 unless a later task adds it. -* **Module-level only:** `module.*` keys must not appear in workflow-level `config { }` blocks. After parsing, reject workflow-level config that sets any `module.*` key, using the same pattern as the existing `runtime.*` workflow guard in `src/parse/workflows.ts`. -* Stored on `WorkflowMetadata` as descriptive metadata only. They do **not** map into `JaiphConfig`, environment resolution, or the Node workflow runtime unless a future task wires them (e.g. MCP tool metadata). - -**Implementation touchpoints** - -* `src/parse/metadata.ts` — `ALLOWED_KEYS`, `KEY_TYPES`, `assignConfigKey`. -* `src/types.ts` — optional `module?: { name?: string; version?: string; description?: string }` on `WorkflowMetadata`. -* `src/format/emit.ts` — formatter round-trip for the new keys. -* `src/parse/workflows.ts` — workflow-level rejection for `module.*` (mirror `metadata.runtime`). -* Tests: `src/parse/parse-metadata.test.ts`; update parse-error golden/txtar cases if the unknown-key allowed-list appears in expectations. -* Docs: `docs/configuration.md`, `docs/grammar.md` (`config_key`). - -**Non-goals** - -* Environment variables, CLI output, or runtime behavior changes beyond parsing/formatting/validation. - -**Queue coordination** - -* No conflict with the queued `jaiph serve` MCP task; future work may read `module.description` for tool listings. - -**Acceptance criteria** - -* Module-level `config` accepts `module.name`, `module.version`, and `module.description`; values round-trip through `jaiph format`. -* Workflow-level `config` containing any `module.*` assignment fails with an explicit error (consistent with `runtime.*` workflow rules). -* Unit tests cover happy path and workflow rejection; docs and grammar list the keys. - -**Scope note** - -* Expect more than three files (parser, types, formatter, workflows guard, tests, docs); keep the existing plain `assignConfigKey` style — no new abstraction layers. - ## Runtime — harden Docker execution environment #dev-ready **Goal** diff --git a/docs/configuration.md b/docs/configuration.md index ee6d7c1a..faa039f6 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -87,7 +87,7 @@ workflow default() { **Rules:** - At most one per workflow; it must be the first non-comment construct in the body. A duplicate is `E_PARSE`: `duplicate config block inside workflow (only one allowed per workflow)`. -- Only **`agent.*` and `run.*` keys** are allowed. Any `runtime.*` key is `E_PARSE`. +- Only **`agent.*` and `run.*` keys** are allowed. Any `runtime.*` or `module.*` key is `E_PARSE`. - Workflow-level values apply to all steps in that workflow, including `ensure`d rules and scripts called from it. When the workflow finishes, the previous environment is restored. **Sibling isolation:** Each workflow gets its own clone of the parent environment. Sibling workflows never see each other's config — even when they execute sequentially. If workflow `alpha` sets `agent.backend = "claude"` and workflow `beta` only sets `agent.default_model = "beta-model"`, `beta` still sees the module-level backend (e.g. `"cursor"`), not `alpha`'s. @@ -137,6 +137,31 @@ These control runtime behavior unrelated to the agent. | `run.debug` | boolean | `false` | `JAIPH_DEBUG` | Enables debug tracing for the run. | | `run.inbox_parallel` | boolean | `false` | `JAIPH_INBOX_PARALLEL` | Dispatch inbox route targets concurrently. See [Inbox — Parallel dispatch](inbox.md#parallel-dispatch). | +### Module keys + +Optional descriptive metadata about the workflow module. These are informational only — they do not affect agent, run, or runtime behavior. Future features (e.g. MCP tool metadata) may consume them. + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `module.name` | string | _(unset)_ | Human-readable name for this module. | +| `module.version` | string | _(unset)_ | Version string (no validation — any quoted string is accepted). | +| `module.description` | string | _(unset)_ | Short description of what this module does. | + +Module keys can only appear in **module-level** config blocks. Any `module.*` key inside a workflow-level config is `E_PARSE`. + +```jh +config { + module.name = "deploy-pipeline" + module.version = "2.0.0" + module.description = "Production deployment with rollback" + agent.backend = "claude" +} + +workflow default() { + log "deploying..." +} +``` + ### Runtime keys (Docker sandbox — beta) These configure Docker sandboxing. Unlike agent and run keys, runtime keys are resolved by the `jaiph run` CLI at launch — not by the workflow runtime. They can only appear in **module-level** config blocks (not workflow-level). @@ -303,6 +328,9 @@ Quick reference for all in-file keys and their environment variable equivalents: | `runtime.docker_network` | `JAIPH_DOCKER_NETWORK` | | `runtime.docker_timeout` | `JAIPH_DOCKER_TIMEOUT` | | `runtime.workspace` | _(no env override)_ | +| `module.name` | _(no env override)_ | +| `module.version` | _(no env override)_ | +| `module.description` | _(no env override)_ | ## Inspecting effective config at runtime diff --git a/docs/grammar.md b/docs/grammar.md index d3707263..521355d8 100644 --- a/docs/grammar.md +++ b/docs/grammar.md @@ -793,7 +793,8 @@ config_line = config_key "=" config_value ; config_key = "agent.default_model" | "agent.command" | "agent.backend" | "agent.trusted_workspace" | "agent.cursor_flags" | "agent.claude_flags" | "run.logs_dir" | "run.debug" | "run.inbox_parallel" | "runtime.docker_enabled" | "runtime.docker_image" | "runtime.docker_network" - | "runtime.docker_timeout" | "runtime.workspace" ; + | "runtime.docker_timeout" | "runtime.workspace" + | "module.name" | "module.version" | "module.description" ; config_value = string | "true" | "false" | integer | string_array ; integer = digit { digit } ; string_array = "[" { array_element } "]" ; @@ -824,7 +825,7 @@ workflow_decl = [ "export" ] "workflow" IDENT [ "(" param_list ")" ] "{" [ wor param_list = IDENT { "," IDENT } ; (* identifiers; no duplicates; no reserved keywords *) workflow_config = config_block ; (* optional per-workflow override; must appear before steps; - only agent.* and run.* keys allowed; runtime.* yields E_PARSE *) + only agent.* and run.* keys allowed; runtime.* and module.* yield E_PARSE *) workflow_step = ensure_stmt | run_stmt | run_catch_stmt | run_async_stmt | prompt_stmt | prompt_capture_stmt | const_decl_step | return_stmt diff --git a/docs/index.html b/docs/index.html index 921f590d..58771788 100644 --- a/docs/index.html +++ b/docs/index.html @@ -503,9 +503,9 @@

Syntax

Jaiph workflows

config { ... }
-
Optional runtime options (agent backend/flags, logs, runtime). Allowed at the top level +
Optional runtime options (agent backend/flags, logs, runtime, module metadata). Allowed at the top level (module-wide) and inside individual workflows (per-workflow overrides for agent.* - and run.* keys). Environment variables override config values. See run.* keys only; runtime.* and module.* are module-level only). Environment variables override config values. See Configuration.
import "file.jh" as alias · const name = value / diff --git a/src/format/emit.ts b/src/format/emit.ts index 20f02a35..484488a4 100644 --- a/src/format/emit.ts +++ b/src/format/emit.ts @@ -182,6 +182,15 @@ function emitConfigKeyLines(meta: WorkflowMetadata, key: string, pad: string): s ws.push(`${pad}]`); return ws; } + case "module.name": + if (meta.module?.name === undefined) return []; + return [`${pad}module.name = "${meta.module.name}"`]; + case "module.version": + if (meta.module?.version === undefined) return []; + return [`${pad}module.version = "${meta.module.version}"`]; + case "module.description": + if (meta.module?.description === undefined) return []; + return [`${pad}module.description = "${meta.module.description}"`]; default: return []; } @@ -230,6 +239,11 @@ function emitConfig(meta: WorkflowMetadata, pad: string): string { } } } + if (meta.module) { + if (meta.module.name !== undefined) lines.push(`${pad}module.name = "${meta.module.name}"`); + if (meta.module.version !== undefined) lines.push(`${pad}module.version = "${meta.module.version}"`); + if (meta.module.description !== undefined) lines.push(`${pad}module.description = "${meta.module.description}"`); + } lines.push("}"); return lines.join("\n"); } diff --git a/src/parse/metadata.ts b/src/parse/metadata.ts index ed5f9d8f..0b100024 100644 --- a/src/parse/metadata.ts +++ b/src/parse/metadata.ts @@ -17,6 +17,9 @@ const ALLOWED_KEYS = new Set([ "runtime.docker_network", "runtime.docker_timeout", "runtime.workspace", + "module.name", + "module.version", + "module.description", ]); /** Expected value type for each key that needs type validation. */ @@ -35,6 +38,9 @@ const KEY_TYPES: Record = "runtime.docker_network": "string", "runtime.docker_timeout": "number", "runtime.workspace": "string[]", + "module.name": "string", + "module.version": "string", + "module.description": "string", }; function parseMetadataValue(filePath: string, rawLine: string, valuePart: string, lineNo: number): string | boolean | number | string[] { @@ -222,6 +228,21 @@ function assignConfigKey( out.runtime = {}; } out.runtime.workspace = value as string[]; + } else if (key === "module.name") { + if (!out.module) { + out.module = {}; + } + out.module.name = value as string; + } else if (key === "module.version") { + if (!out.module) { + out.module = {}; + } + out.module.version = value as string; + } else if (key === "module.description") { + if (!out.module) { + out.module = {}; + } + out.module.description = value as string; } } diff --git a/src/parse/parse-metadata.test.ts b/src/parse/parse-metadata.test.ts index 8b121adc..e639a202 100644 --- a/src/parse/parse-metadata.test.ts +++ b/src/parse/parse-metadata.test.ts @@ -199,6 +199,74 @@ test("parseConfigBlock: fails on type mismatch (number where string expected)", ); }); +// --------------------------------------------------------------------------- +// Module manifest keys (module.name, module.version, module.description) +// --------------------------------------------------------------------------- + +test("parseConfigBlock: parses module.name, module.version, module.description", () => { + const lines = [ + "config {", + ' module.name = "my-workflow"', + ' module.version = "1.2.3"', + ' module.description = "A helpful workflow"', + "}", + ]; + const { metadata } = parseConfigBlock("test.jh", lines, 0); + assert.equal(metadata.module?.name, "my-workflow"); + assert.equal(metadata.module?.version, "1.2.3"); + assert.equal(metadata.module?.description, "A helpful workflow"); +}); + +test("parseConfigBlock: module keys are optional (partial set)", () => { + const lines = [ + "config {", + ' module.name = "only-name"', + "}", + ]; + const { metadata } = parseConfigBlock("test.jh", lines, 0); + assert.equal(metadata.module?.name, "only-name"); + assert.equal(metadata.module?.version, undefined); + assert.equal(metadata.module?.description, undefined); +}); + +test("parseConfigBlock: module keys coexist with other config keys", () => { + const lines = [ + "config {", + ' module.name = "proj"', + ' agent.backend = "claude"', + "}", + ]; + const { metadata } = parseConfigBlock("test.jh", lines, 0); + assert.equal(metadata.module?.name, "proj"); + assert.equal(metadata.agent?.backend, "claude"); +}); + +test("module keys round-trip through formatter", () => { + const src = [ + 'config {', + ' module.name = "my-tool"', + ' module.version = "0.1.0"', + ' module.description = "Does things"', + '}', + '', + 'workflow default() {', + ' log "ok"', + '}', + ].join("\n"); + const mod = parsejaiph(src, "test.jh"); + assert.equal(mod.metadata?.module?.name, "my-tool"); + assert.equal(mod.metadata?.module?.version, "0.1.0"); + assert.equal(mod.metadata?.module?.description, "Does things"); + + // Verify formatter round-trip produces valid source that re-parses identically + const { emitModule } = require("../format/emit"); + const emitted = emitModule(mod); + const reparsed = parsejaiph(emitted, "test.jh"); + assert.equal(reparsed.metadata?.module?.name, "my-tool"); + assert.equal(reparsed.metadata?.module?.version, "0.1.0"); + assert.equal(reparsed.metadata?.module?.description, "Does things"); +}); + // --------------------------------------------------------------------------- // Workflow-level config // --------------------------------------------------------------------------- @@ -264,6 +332,20 @@ test("workflow config: rejects config after steps", () => { ); }); +test("workflow config: rejects module.* keys", () => { + const src = [ + "workflow default() {", + " config {", + ' module.name = "nope"', + " }", + "}", + ].join("\n"); + assert.throws( + () => parsejaiph(src, "test.jh"), + /module\.\* keys are not allowed in workflow-level config/, + ); +}); + test("workflow config: rejects runtime.* keys", () => { const src = [ "workflow default() {", diff --git a/src/parse/workflows.ts b/src/parse/workflows.ts index 7f5cbccb..f2bcadeb 100644 --- a/src/parse/workflows.ts +++ b/src/parse/workflows.ts @@ -142,6 +142,9 @@ export function parseWorkflowBlock( if (metadata.runtime) { fail(filePath, "runtime.* keys are not allowed in workflow-level config (only agent.* and run.* keys)", lineNo); } + if (metadata.module) { + fail(filePath, "module.* keys are not allowed in workflow-level config (only agent.* and run.* keys)", lineNo); + } workflow.metadata = metadata; continue; } @@ -201,6 +204,9 @@ export function parseWorkflowBlock( if (metadata.runtime) { fail(filePath, "runtime.* keys are not allowed in workflow-level config (only agent.* and run.* keys)", innerNo); } + if (metadata.module) { + fail(filePath, "module.* keys are not allowed in workflow-level config (only agent.* and run.* keys)", innerNo); + } workflow.metadata = metadata; idx = nextIndex - 1; continue; diff --git a/src/types.ts b/src/types.ts index 05b48b64..f3696a6c 100644 --- a/src/types.ts +++ b/src/types.ts @@ -307,6 +307,7 @@ export interface WorkflowMetadata { }; run?: { debug?: boolean; logsDir?: string; inboxParallel?: boolean }; runtime?: RuntimeConfig; + module?: { name?: string; version?: string; description?: string }; /** Preserves `#` lines and assignment order inside `config { }` (formatter). */ configBodySequence?: ConfigBodyPart[]; } From d9b3b003cf1b2d07dde84c9908f5e93d3a5129cd Mon Sep 17 00:00:00 2001 From: Jakub Dzikowski Date: Fri, 17 Apr 2026 22:12:21 +0200 Subject: [PATCH 07/38] Feat: Harden Docker execution environment with least-privilege defaults Add mount denylist rejecting dangerous host paths (/, /proc, /sys, /dev, Docker socket) at validation time with E_VALIDATE_MOUNT. Add environment variable denylist (SSH_*, GPG_*, AWS_*, GCP_*, AZURE_*, GOOGLE_*, DOCKER_*, KUBE*, NPM_TOKEN*) preventing host credential leakage into containers. Launch containers with --cap-drop ALL --cap-add SYS_ADMIN --security-opt no-new-privileges for least-privilege capability control. Document threat model in docs/sandboxing.md covering what Docker does and does not protect against (hooks on host, network egress, agent credential forwarding, image supply chain, container escapes). Add failure-modes reference table, expanded network-mode guidance, and env denylist spec. Unit tests cover all new validation and filtering paths. Co-Authored-By: Claude Opus 4.6 --- CHANGELOG.md | 1 + QUEUE.md | 29 ----------- docs/sandboxing.md | 55 +++++++++++++++++++- src/runtime/docker.test.ts | 104 +++++++++++++++++++++++++++++++++++++ src/runtime/docker.ts | 68 +++++++++++++++++++++++- 5 files changed, 225 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ea08d0e0..50e5fffb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## All changes +- **Feature — Docker:** Harden Docker execution environment — Docker sandboxing now enforces least-privilege defaults and explicit boundary controls. Containers launch with `--cap-drop ALL --cap-add SYS_ADMIN --security-opt no-new-privileges`, dropping all Linux capabilities except the one required for fuse-overlayfs and preventing privilege escalation. A mount denylist rejects dangerous host paths (`/`, `/var/run/docker.sock`, `/run/docker.sock`, `/proc`, `/sys`, `/dev` and their subpaths) at validation time with `E_VALIDATE_MOUNT` — both in `validateMounts` and at `buildDockerArgs` time. An environment variable denylist (`SSH_*`, `GPG_*`, `AWS_*`, `GCP_*`, `AZURE_*`, `GOOGLE_*`, `DOCKER_*`, `KUBE*`, `NPM_TOKEN*`) prevents host credentials from leaking into the container; only `JAIPH_*` (except `JAIPH_DOCKER_*`) and agent prefixes (`ANTHROPIC_*`, `CLAUDE_*`, `CURSOR_*`) cross the container boundary. New exports: `validateMountHostPath`, `isEnvDenied`, `ENV_DENYLIST_PREFIXES`. Documentation adds a threat-model section (what Docker does and does not protect against), a failure-modes reference table (`E_DOCKER_*` / `E_VALIDATE_MOUNT` / `E_TIMEOUT`), expanded network-mode guidance, and the env denylist specification. Implementation: `src/runtime/docker.ts` (mount denylist, env denylist, security flags), `src/runtime/docker.test.ts` (unit tests for all new paths). Docs updated (`docs/sandboxing.md`). - **Feature — Language:** Optional module manifest keys (`module.name`, `module.version`, `module.description`) — The module-level `config { }` block now accepts three optional descriptive metadata keys: `module.name`, `module.version`, and `module.description`. All three are strings, all optional, and purely informational — they do not affect agent, run, or runtime behavior. Values are stored on `WorkflowMetadata.module` and round-trip through `jaiph format`. No semver validation is applied to `module.version`; any quoted string is accepted. Workflow-level `config` blocks reject `module.*` keys with `E_PARSE`, consistent with the existing `runtime.*` workflow guard. Future features (e.g. MCP tool metadata) may consume these fields. Implementation: `ALLOWED_KEYS` and `assignConfigKey` in `src/parse/metadata.ts`, `WorkflowMetadata.module` in `src/types.ts`, formatter round-trip in `src/format/emit.ts`, workflow-level rejection in `src/parse/workflows.ts`. Unit tests cover happy path, partial keys, coexistence with other config keys, formatter round-trip, and workflow-level rejection. Docs updated (`docs/configuration.md`, `docs/grammar.md`). - **Breaking — Docker:** Strict image contract and official GHCR runtime images — Docker mode now enforces a strict contract: every Docker image used by Jaiph must already contain a working `jaiph` CLI. Jaiph no longer auto-builds derived images or bootstraps itself into containers at runtime (no `npm pack`, no `npm install -g` into arbitrary base images). If the selected image lacks `jaiph`, the run fails immediately with `E_DOCKER_NO_JAIPH` and actionable guidance. The default `runtime.docker_image` is now `ghcr.io/jaiphlang/jaiph-runtime:` (matching the installed jaiph version), replacing the previous `node:20-bookworm` default. Official runtime images are published to GHCR: `ghcr.io/jaiphlang/jaiph-runtime:` for release tags, `:nightly` for the nightly branch, and `:latest` as a convenience alias. The official image includes Node.js, jaiph, `fuse-overlayfs`, and a non-root `jaiph` user (UID 10001); it does not include agent CLIs to keep the image minimal. The `jaiph init` Dockerfile template now extends the official image (`FROM ghcr.io/jaiphlang/jaiph-runtime:nightly`) and only adds agent CLIs (Claude Code, cursor-agent), instead of building from `ubuntu:latest` with a full install chain. Removed functions: `ensureLocalRuntimeImage`, `buildRuntimeImageFromLocalPackage`, `autoRuntimeImageTag`, `imageConfiguredUser`, `imageHomeDir`. Added: `verifyImageHasJaiph`, `GHCR_IMAGE_REPO`, `resolveDefaultImageTag`. CI: new `.github/workflows/docker-publish.yml` publishes the runtime image on release tags and nightly pushes. Implementation: `src/runtime/docker.ts`, `src/cli/commands/init.ts`, `docker/Dockerfile.runtime`. Unit and E2E tests updated for the strict contract — regression test confirms images without jaiph fail with `E_DOCKER_NO_JAIPH`. Docs updated (`docs/sandboxing.md`, `docs/configuration.md`, `docs/cli.md`, `docs/architecture.md`). - **Feature — Language/Runtime:** Explicit nested managed calls in argument position — Call arguments can now contain nested managed calls using `run` or `ensure` keywords explicitly: `run foo(run bar())`, `run foo(ensure rule_bar())`, and `run foo(run \`echo "aaa"\`())`. The nested call executes first and its result is passed as a single argument to the outer call. Bare call-like forms in argument position are rejected at compile time: `run foo(bar())` → `E_VALIDATE` with an actionable message telling the user to add `run` or `ensure`. Bare inline script calls in argument position (`run foo(\`echo aaa\`())`) are also rejected with guidance. The explicit capture-then-pass form (`const x = run bar()` followed by `run foo(x)`) remains valid. Bare call-like forms in `const` assignments (`const x = bar()`) are also rejected — use `const x = run bar()`. The formatter round-trips explicit nested forms correctly, including the inline script variant. The runtime evaluates nested managed argument tokens (workflows, scripts, rules, and inline scripts) before passing the result to the outer call. Implementation: validator (`src/transpile/validate.ts` — `validateNestedManagedCallArgs` extended for inline script detection), runtime (`src/runtime/kernel/node-workflow-runtime.ts` — `managed_inline_script` token kind, `parseInlineScriptAt`, `resolveArgsRawSync` fast path), formatter (`src/format/emit.ts` — `parseInlineScriptArg`, inline script formatting in `formatArgs`). Regression tests added for all valid and invalid forms. Docs updated (`docs/language.md`, `docs/grammar.md`, `docs/jaiph-skill.md`). diff --git a/QUEUE.md b/QUEUE.md index 7c992336..a1df4264 100644 --- a/QUEUE.md +++ b/QUEUE.md @@ -12,35 +12,6 @@ Process rules: *** -## Runtime — harden Docker execution environment #dev-ready - -**Goal** -Docker mode is the isolation boundary for workflow runs. Harden it: least-privilege mounts, explicit and documented env forwarding (what crosses the container boundary), network defaults, and failure modes when Docker is misconfigured or unavailable — so "Docker on" is a deliberate security posture, not accidental leakage. (Image provenance and the official default image belong to the queued **Docker — strict image contract + GHCR** task; this task only documents or tightens runtime-visible pull/verify behavior as needed, without redefining publishing or the default image.) - -**Context** - -* Docker runtime: `src/runtime/docker.ts` (`parseMounts` / `validateMounts`, `resolveDockerConfig`, `buildDockerArgs`, `checkDockerAvailable`, `spawnDockerProcess`); CLI integration: `src/cli/commands/run.ts`. -* Current forwarding: `buildDockerArgs` remaps `JAIPH_WORKSPACE` and `JAIPH_RUNS_DIR`, passes through `JAIPH_*` except `JAIPH_DOCKER_*`, and passes keys prefixed `CURSOR_`, `ANTHROPIC_`, or `CLAUDE_` (see `AGENT_ENV_PREFIXES` in `docker.ts`). Mounts come from resolved `runtime.workspace` plus fixed rw run-dir, ro overlay script, and `--device /dev/fuse`. -* E2E: `e2e/tests/72_docker_run_artifacts.sh`, `e2e/tests/73_docker_dockerfile_detection.sh`. -* Config: `runtime.docker_enabled`, `runtime.docker_image`, `runtime.docker_network`, `runtime.docker_timeout`, `runtime.workspace` via `src/config.ts` and metadata parsing. - -**Queue coordination** - -* Land after or together with **Docker — strict image contract + publish official `jaiph-runtime` images to GHCR** so bootstrap removal and default image changes are settled before deep hardening refactors the same code paths. -* Land after or together with **Runtime — credential proxy for Docker mode** so any env allowlist/denylist and `docs/sandboxing.md` text stay consistent with placeholder `ANTHROPIC_*` and host-reachable API base URLs (no real secrets in `-e`). -* The later task **Runtime — default Docker when not CI or unsafe** changes `runtime.docker_enabled` defaults; avoid conflicting precedence — document how hardened Docker behavior interacts with that default once both exist. - -**Acceptance criteria** - -* Threat-model notes (short section in `docs/sandboxing.md` or equivalent): what Docker is / is not protecting against (including that hooks run on the host). -* Concrete hardening changes in `docker.ts` / run path (e.g. mount validation, env allowlist or documented denylist aligned with the credential-proxy contract, safer defaults) with unit tests. -* No silent widen of host access without opt-in. -* Document network mode behavior (`runtime.docker_network` / `--network`) and failure modes for missing Docker or failed pulls (`E_DOCKER_*`), extending existing patterns where appropriate. - -**Scope note** - -* `docker.ts` is already large (\~650+ lines); prefer small helpers or one focused sibling module over speculative abstractions. Expect at least `docker.ts`, `docker.test.ts`, and `docs/sandboxing.md`; split follow-ups if the change set outgrows one cycle. - ## Runtime — default Docker when not CI or unsafe #dev-ready **Goal** diff --git a/docs/sandboxing.md b/docs/sandboxing.md index b43d5080..c8648157 100644 --- a/docs/sandboxing.md +++ b/docs/sandboxing.md @@ -19,6 +19,25 @@ The runtime executes rules by walking the AST in-process (`NodeWorkflowRuntime.e `jaiph test` executes tests in-process with `NodeTestRunner` and does not use Docker or a separate rule sandbox. +## Threat model + +Docker sandboxing is designed to contain damage from untrusted or semi-trusted workflow scripts. Understanding what it does and does not protect against helps you make informed decisions about when to enable it. + +**What Docker protects against:** + +- **Filesystem access** -- Scripts inside the container cannot read or write arbitrary host paths. The host workspace is mounted read-only; writes go to a tmpfs overlay and are discarded on exit. Only the run-artifacts directory (`/jaiph/run`) persists writes to the host. +- **Process isolation** -- Container processes cannot see or signal host processes. The container runs with `--cap-drop ALL` (only `SYS_ADMIN` is re-added for fuse-overlayfs) and `--security-opt no-new-privileges` to prevent privilege escalation. +- **Credential leakage** -- Sensitive host environment variables (`SSH_*`, `GPG_*`, `AWS_*`, `GCP_*`, `AZURE_*`, `GOOGLE_*`, `DOCKER_*`, `KUBE*`, `NPM_TOKEN*`) are never forwarded into the container. Only `JAIPH_*` (except `JAIPH_DOCKER_*`) and agent prefixes (`ANTHROPIC_*`, `CLAUDE_*`, `CURSOR_*`) cross the container boundary. +- **Mount safety** -- The host root filesystem (`/`), Docker socket (`/var/run/docker.sock`, `/run/docker.sock`), and OS internals (`/proc`, `/sys`, `/dev`) cannot be mounted into the container. Attempting to do so produces `E_VALIDATE_MOUNT`. + +**What Docker does NOT protect against:** + +- **Hooks run on the host.** Hook commands in `hooks.json` execute on the host CLI process, not inside the container. A malicious hook definition has full host access. Treat `hooks.json` as trusted configuration. +- **Network egress by default.** Unless `runtime.docker_network` is set to `"none"`, the container has outbound network access via Docker's default bridge. Scripts can reach external services and exfiltrate data through the network. +- **Agent credential forwarding.** `ANTHROPIC_*`, `CLAUDE_*`, and `CURSOR_*` variables are forwarded into the container so agent-backed workflows function. A malicious script can read these from its environment. When the credential-proxy feature lands, these will be replaced by proxy URLs that do not expose raw API keys. +- **Image supply chain.** Jaiph verifies that the selected image contains `jaiph` but does not verify image signatures or provenance. Use trusted registries and pin image digests for production workloads. +- **Container escapes.** Docker is not a security boundary against a determined attacker with kernel exploits. It raises the bar significantly for script-level mischief but is not equivalent to a VM or hardware-level isolation. + ## Docker container isolation > **Beta.** Docker sandboxing is functional but still under active development. Expect rough edges, breaking changes, and incomplete platform coverage. Feedback is welcome at . @@ -80,6 +99,12 @@ Mode must be `ro` or `rw` (otherwise `E_PARSE`). Exactly one mount must target ` Host paths are resolved relative to the workspace root. Each mount is duplicated at the overlay lower-layer path (`/jaiph/workspace-ro/...`) so the overlay wrapper can use it as the read-only source. +The following host paths are rejected at mount validation time with `E_VALIDATE_MOUNT`: + +- `/` (host root filesystem) +- `/var/run/docker.sock`, `/run/docker.sock` (Docker daemon socket) +- `/proc`, `/sys`, `/dev` (OS internals, including subpaths like `/proc/1/root`) + ### Container layout ``` @@ -96,7 +121,7 @@ The working directory is `/jaiph/workspace`. The host CLI generates `overlay-run ### Runtime behavior -**Container lifecycle** -- `docker run --rm` launches the container and auto-removes it on exit. `--device /dev/fuse` exposes the FUSE device for the overlay. The pseudo-TTY flag (`-t`) is intentionally omitted: Docker's `-t` merges stderr into stdout, which would break the `__JAIPH_EVENT__` stderr-only live contract. On Linux, `--user :` maps the container user to the host user. +**Container lifecycle** -- `docker run --rm` launches the container and auto-removes it on exit. `--cap-drop ALL --cap-add SYS_ADMIN` drops all Linux capabilities except `SYS_ADMIN` (required for fuse-overlayfs). `--security-opt no-new-privileges` prevents any process inside the container from gaining additional privileges. `--device /dev/fuse` exposes the FUSE device for the overlay. The pseudo-TTY flag (`-t`) is intentionally omitted: Docker's `-t` merges stderr into stdout, which would break the `__JAIPH_EVENT__` stderr-only live contract. On Linux, `--user :` maps the container user to the host user. **stdin** -- The `docker run` process is spawned with stdin set to `ignore` to prevent the Docker CLI from blocking on stdin EOF. @@ -106,12 +131,28 @@ The working directory is `/jaiph/workspace`. The host CLI generates `overlay-run **Run artifacts** -- The host CLI mounts the resolved host runs root at `/jaiph/run:rw` inside the container. By default this is `.jaiph/runs` under the workspace; a relative `JAIPH_RUNS_DIR` is resolved under the workspace; an absolute `JAIPH_RUNS_DIR` must stay within the workspace or the run fails with `E_DOCKER_RUNS_DIR`. `JAIPH_RUNS_DIR` is set to `/jaiph/run` inside the container, so the runtime writes artifacts directly into the requested host path. -**Network** -- `"default"` omits `--network` (Docker's default bridge). `"none"` passes `--network none`. Any other value is passed through as-is. +**Network** -- `"default"` omits `--network`, which uses Docker's default bridge network (outbound access allowed). `"none"` passes `--network none` and fully disables networking -- use this for workflows that should not make external calls. Any other value (e.g. a custom Docker network name) is passed through as-is. Set `runtime.docker_network` in config or `JAIPH_DOCKER_NETWORK` in the environment. **Timeout** -- When `runtime.docker_timeout` is greater than zero, the CLI sends `SIGTERM` to the container process on overrun, followed by `SIGKILL` after a 5-second grace period. The failure message includes `E_TIMEOUT container execution exceeded timeout`. **Image pull** -- If the image is not present locally, `docker pull` runs automatically. Pull failure produces `E_DOCKER_PULL`. +### Failure modes + +Docker-related errors use `E_DOCKER_*` codes for programmatic detection: + +| Error code | Trigger | Behavior | +|------------|---------|----------| +| `E_DOCKER_NOT_FOUND` | `docker info` fails (Docker not installed or daemon not running) | Run exits immediately. No fallback to local execution. | +| `E_DOCKER_PULL` | `docker pull` fails (network error, image not found, auth failure) | Run exits. Check registry access and image name. | +| `E_DOCKER_BUILD` | `docker build` from `.jaiph/Dockerfile` fails | Run exits. Fix the Dockerfile and retry. | +| `E_DOCKER_NO_JAIPH` | Selected image does not contain a `jaiph` CLI | Run exits with guidance to use the official image or install jaiph. | +| `E_DOCKER_RUNS_DIR` | Absolute `JAIPH_RUNS_DIR` points outside the workspace | Run exits. Use a relative path or an absolute path within the workspace. | +| `E_VALIDATE_MOUNT` | Mount targets a denied host path (`/`, `/proc`, docker socket, etc.) | Run exits before container launch. | +| `E_TIMEOUT` | Container exceeds `runtime.docker_timeout` seconds | Container receives SIGTERM, then SIGKILL after 5s grace period. | + +All failures are deterministic and produce non-zero exit codes. There is no silent fallback from Docker to local execution. + ### Image contract **Every Docker image used by Jaiph must already contain a working `jaiph` CLI.** Jaiph does not auto-install itself into containers at runtime — no derived image builds, no `npm pack` bootstrap. If the selected image lacks `jaiph`, the run fails immediately with `E_DOCKER_NO_JAIPH` and guidance to use the official image or install jaiph in a custom image. @@ -163,6 +204,16 @@ All `JAIPH_*` variables from the host are forwarded into the container, **except - `ANTHROPIC_*` - `CLAUDE_*` +The following prefixes are **never** forwarded, even if present on the host: + +- `SSH_*`, `GPG_*` -- authentication agent sockets and signing keys +- `AWS_*`, `GCP_*`, `AZURE_*`, `GOOGLE_*` -- cloud provider credentials +- `DOCKER_*` -- Docker daemon configuration (prevents container-in-container) +- `KUBE*` -- Kubernetes configuration +- `NPM_TOKEN*` -- package registry credentials + +This denylist is enforced in `buildDockerArgs` and cannot be overridden. If a workflow needs cloud credentials inside the container, pass them explicitly through `JAIPH_*`-prefixed variables or use a credential proxy. + ### Example A workflow with Docker sandboxing enabled and an extra read-only mount for a `config` directory (using the shorthand form): diff --git a/src/runtime/docker.test.ts b/src/runtime/docker.test.ts index c9d4ef7a..e706697d 100644 --- a/src/runtime/docker.test.ts +++ b/src/runtime/docker.test.ts @@ -4,6 +4,7 @@ import { parseMount, parseMounts, validateMounts, + validateMountHostPath, resolveDockerConfig, buildDockerArgs, remapDockerEnv, @@ -14,6 +15,8 @@ import { resolveImage, buildImageFromDockerfile, verifyImageHasJaiph, + isEnvDenied, + ENV_DENYLIST_PREFIXES, GHCR_IMAGE_REPO, type MountSpec, type DockerRunConfig, @@ -572,3 +575,104 @@ test("verifyImageHasJaiph: throws E_DOCKER_NO_JAIPH with guidance for missing ja assert.ok(src.includes("E_DOCKER_NO_JAIPH"), "verifyImageHasJaiph must use E_DOCKER_NO_JAIPH error code"); assert.ok(src.includes(GHCR_IMAGE_REPO), "error message must reference official GHCR image"); }); + +// --------------------------------------------------------------------------- +// validateMountHostPath: dangerous mount rejection +// --------------------------------------------------------------------------- + +test("validateMountHostPath: allows normal workspace path", () => { + assert.doesNotThrow(() => validateMountHostPath("/home/user/project")); +}); + +test("validateMountHostPath: rejects root filesystem", () => { + assert.throws(() => validateMountHostPath("/"), /E_VALIDATE_MOUNT.*root filesystem/); +}); + +test("validateMountHostPath: rejects docker socket", () => { + assert.throws(() => validateMountHostPath("/var/run/docker.sock"), /E_VALIDATE_MOUNT.*denied/); +}); + +test("validateMountHostPath: rejects /proc", () => { + assert.throws(() => validateMountHostPath("/proc"), /E_VALIDATE_MOUNT.*denied/); +}); + +test("validateMountHostPath: rejects /proc subpath", () => { + assert.throws(() => validateMountHostPath("/proc/1/root"), /E_VALIDATE_MOUNT.*denied/); +}); + +test("validateMountHostPath: rejects /sys", () => { + assert.throws(() => validateMountHostPath("/sys"), /E_VALIDATE_MOUNT.*denied/); +}); + +test("validateMountHostPath: rejects /dev", () => { + assert.throws(() => validateMountHostPath("/dev"), /E_VALIDATE_MOUNT.*denied/); +}); + +test("validateMountHostPath: rejects /run/docker.sock", () => { + assert.throws(() => validateMountHostPath("/run/docker.sock"), /E_VALIDATE_MOUNT.*denied/); +}); + +// --------------------------------------------------------------------------- +// isEnvDenied: env denylist +// --------------------------------------------------------------------------- + +test("isEnvDenied: blocks SSH_ vars", () => { + assert.equal(isEnvDenied("SSH_AUTH_SOCK"), true); +}); + +test("isEnvDenied: blocks AWS_ vars", () => { + assert.equal(isEnvDenied("AWS_SECRET_ACCESS_KEY"), true); +}); + +test("isEnvDenied: blocks DOCKER_ vars", () => { + assert.equal(isEnvDenied("DOCKER_HOST"), true); +}); + +test("isEnvDenied: blocks GPG_ vars", () => { + assert.equal(isEnvDenied("GPG_AGENT_INFO"), true); +}); + +test("isEnvDenied: blocks KUBE vars", () => { + assert.equal(isEnvDenied("KUBECONFIG"), true); +}); + +test("isEnvDenied: allows JAIPH_ vars", () => { + assert.equal(isEnvDenied("JAIPH_DEBUG"), false); +}); + +test("isEnvDenied: allows ANTHROPIC_ vars", () => { + assert.equal(isEnvDenied("ANTHROPIC_API_KEY"), false); +}); + +test("buildDockerArgs: denied env vars are not forwarded", () => { + const opts = defaultOpts({ + env: { + JAIPH_DEBUG: "true", + SSH_AUTH_SOCK: "/tmp/ssh.sock", + AWS_SECRET_ACCESS_KEY: "secret", + DOCKER_HOST: "unix:///var/run/docker.sock", + }, + }); + const args = buildDockerArgs(opts, TEST_OVERLAY); + assert.ok(args.includes("JAIPH_DEBUG=true"), "allowed JAIPH_ var forwarded"); + assert.ok(!args.some((a) => a.includes("SSH_AUTH_SOCK")), "SSH_ denied"); + assert.ok(!args.some((a) => a.includes("AWS_SECRET_ACCESS_KEY")), "AWS_ denied"); + assert.ok(!args.some((a) => a.includes("DOCKER_HOST")), "DOCKER_ denied"); +}); + +// --------------------------------------------------------------------------- +// buildDockerArgs: security flags +// --------------------------------------------------------------------------- + +test("buildDockerArgs: includes --cap-drop ALL and --security-opt no-new-privileges", () => { + const args = buildDockerArgs(defaultOpts(), TEST_OVERLAY); + const capDropIdx = args.indexOf("--cap-drop"); + assert.ok(capDropIdx >= 0, "--cap-drop present"); + assert.equal(args[capDropIdx + 1], "ALL"); + const capAddIdx = args.indexOf("--cap-add"); + assert.ok(capAddIdx >= 0, "--cap-add present"); + assert.equal(args[capAddIdx + 1], "SYS_ADMIN"); + const secOptIdx = args.indexOf("--security-opt"); + assert.ok(secOptIdx >= 0, "--security-opt present"); + assert.equal(args[secOptIdx + 1], "no-new-privileges"); +}); diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts index 83541724..06f6df5d 100644 --- a/src/runtime/docker.ts +++ b/src/runtime/docker.ts @@ -61,10 +61,44 @@ export function parseMounts(specs: string[]): MountSpec[] { return mounts; } +/** + * Host paths that must never be bind-mounted into a container. + * Prevents accidental exposure of the Docker daemon, OS internals, or + * the entire root filesystem. + */ +const DENIED_HOST_PATHS = [ + "/var/run/docker.sock", + "/run/docker.sock", + "/proc", + "/sys", + "/dev", +] as const; + +/** + * Validate a single mount's host path against the denylist. + * Rejects exact matches and child paths (e.g. `/proc/1/root`). + */ +export function validateMountHostPath(hostAbsPath: string): void { + const normalized = hostAbsPath.replace(/\/+$/, ""); + if (normalized === "" || normalized === "/") { + throw new Error( + `E_VALIDATE_MOUNT refusing to mount the host root filesystem ("/") into the container`, + ); + } + for (const denied of DENIED_HOST_PATHS) { + if (normalized === denied || normalized.startsWith(denied + "/")) { + throw new Error( + `E_VALIDATE_MOUNT refusing to mount denied host path "${hostAbsPath}" into the container`, + ); + } + } +} + /** * Validate mount list: exactly one mount must target `/jaiph/workspace`. + * Also rejects dangerous host paths. */ -export function validateMounts(mounts: MountSpec[]): void { +export function validateMounts(mounts: MountSpec[], workspaceRoot?: string): void { const workspaceMounts = mounts.filter( (m) => m.containerPath === "/jaiph/workspace" || m.containerPath.replace(/\/+$/, "") === "/jaiph/workspace", ); @@ -74,6 +108,10 @@ export function validateMounts(mounts: MountSpec[]): void { if (workspaceMounts.length > 1) { throw new Error("E_VALIDATE exactly one mount must target /jaiph/workspace, found multiple"); } + for (const mount of mounts) { + const hostAbs = workspaceRoot ? resolve(workspaceRoot, mount.hostPath) : resolve(mount.hostPath); + validateMountHostPath(hostAbs); + } } // --------------------------------------------------------------------------- @@ -363,6 +401,27 @@ export const CONTAINER_WORKSPACE = "/jaiph/workspace"; export const CONTAINER_RUN_DIR = "/jaiph/run"; const AGENT_ENV_PREFIXES = ["CURSOR_", "ANTHROPIC_", "CLAUDE_"] as const; +/** + * Environment variable prefixes that are never forwarded into the container. + * Prevents leaking host credentials that aren't part of the explicit allowlist. + */ +export const ENV_DENYLIST_PREFIXES = [ + "SSH_", + "GPG_", + "AWS_", + "GCP_", + "AZURE_", + "GOOGLE_", + "DOCKER_", + "KUBE", + "NPM_TOKEN", +] as const; + +/** Returns true if `key` matches any denied prefix. */ +export function isEnvDenied(key: string): boolean { + return ENV_DENYLIST_PREFIXES.some((prefix) => key.startsWith(prefix)); +} + /** Resolve the host run-artifacts root for Docker-backed runs. */ export function resolveDockerHostRunsRoot( workspaceRoot: string, @@ -426,6 +485,11 @@ export function overlayMountPath(containerPath: string): string { export function buildDockerArgs(opts: DockerSpawnOptions, overlayScriptPath: string): string[] { const args: string[] = ["run", "--rm"]; + // Least-privilege: drop all capabilities, re-add only SYS_ADMIN for fuse-overlayfs + args.push("--cap-drop", "ALL"); + args.push("--cap-add", "SYS_ADMIN"); + args.push("--security-opt", "no-new-privileges"); + args.push("--device", "/dev/fuse"); if (process.platform === "linux") { @@ -445,6 +509,7 @@ export function buildDockerArgs(opts: DockerSpawnOptions, overlayScriptPath: str // Workspace inputs: mounted only at the overlay lower-layer path. for (const mount of opts.config.mounts) { const hostAbs = resolve(opts.workspaceRoot, mount.hostPath); + validateMountHostPath(hostAbs); args.push("-v", `${hostAbs}:${overlayMountPath(mount.containerPath)}:ro`); } @@ -459,6 +524,7 @@ export function buildDockerArgs(opts: DockerSpawnOptions, overlayScriptPath: str for (const [key, value] of Object.entries(containerEnv)) { if (value === undefined) continue; + if (isEnvDenied(key)) continue; if (key.startsWith("JAIPH_") && !key.startsWith("JAIPH_DOCKER_")) { args.push("-e", `${key}=${value}`); } From e08f584e384156fd0d8ed7b0dcb45cd0c93a48e0 Mon Sep 17 00:00:00 2001 From: Jakub Dzikowski Date: Fri, 17 Apr 2026 22:21:39 +0200 Subject: [PATCH 08/38] Feat: Default Docker sandboxing to on for local development Docker is now enabled by default when neither CI=true nor JAIPH_UNSAFE=true is set in the environment. This makes sandboxed execution the safe default for local development while keeping Docker off in CI (where it is typically unavailable or redundant) and when the user explicitly opts out via JAIPH_UNSAFE=true. Precedence: JAIPH_DOCKER_ENABLED env > in-file runtime.docker_enabled > CI/unsafe default rule. The test harness and E2E runner set JAIPH_UNSAFE=true so existing tests continue to run on host. Co-Authored-By: Claude Opus 4.6 --- CHANGELOG.md | 1 + QUEUE.md | 21 --------------------- docs/configuration.md | 6 +++--- docs/index.html | 5 +++-- docs/sandboxing.md | 24 ++++++++++++++++++------ e2e/test_all.sh | 1 + package.json | 2 +- src/runtime/docker.test.ts | 24 ++++++++++++++++++++++-- src/runtime/docker.ts | 14 ++++++++++---- 9 files changed, 59 insertions(+), 39 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 50e5fffb..052f85e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## All changes +- **Feature — Docker:** Default Docker when not CI or unsafe — Docker sandboxing is now **on by default** for local development. When neither `CI=true` nor `JAIPH_UNSAFE=true` is set in the environment, `runtime.docker_enabled` defaults to `true`. In CI environments or when `JAIPH_UNSAFE=true` is set, the default is `false`. Explicit overrides (`JAIPH_DOCKER_ENABLED` env var or in-file `runtime.docker_enabled`) always take precedence over the default rule. `JAIPH_UNSAFE=true` is the new explicit escape hatch for local development when Docker is unwanted. Implementation: `resolveDockerConfig()` in `src/runtime/docker.ts`. Unit tests for all env combinations added. Docs updated (`docs/sandboxing.md`, `docs/configuration.md`). - **Feature — Docker:** Harden Docker execution environment — Docker sandboxing now enforces least-privilege defaults and explicit boundary controls. Containers launch with `--cap-drop ALL --cap-add SYS_ADMIN --security-opt no-new-privileges`, dropping all Linux capabilities except the one required for fuse-overlayfs and preventing privilege escalation. A mount denylist rejects dangerous host paths (`/`, `/var/run/docker.sock`, `/run/docker.sock`, `/proc`, `/sys`, `/dev` and their subpaths) at validation time with `E_VALIDATE_MOUNT` — both in `validateMounts` and at `buildDockerArgs` time. An environment variable denylist (`SSH_*`, `GPG_*`, `AWS_*`, `GCP_*`, `AZURE_*`, `GOOGLE_*`, `DOCKER_*`, `KUBE*`, `NPM_TOKEN*`) prevents host credentials from leaking into the container; only `JAIPH_*` (except `JAIPH_DOCKER_*`) and agent prefixes (`ANTHROPIC_*`, `CLAUDE_*`, `CURSOR_*`) cross the container boundary. New exports: `validateMountHostPath`, `isEnvDenied`, `ENV_DENYLIST_PREFIXES`. Documentation adds a threat-model section (what Docker does and does not protect against), a failure-modes reference table (`E_DOCKER_*` / `E_VALIDATE_MOUNT` / `E_TIMEOUT`), expanded network-mode guidance, and the env denylist specification. Implementation: `src/runtime/docker.ts` (mount denylist, env denylist, security flags), `src/runtime/docker.test.ts` (unit tests for all new paths). Docs updated (`docs/sandboxing.md`). - **Feature — Language:** Optional module manifest keys (`module.name`, `module.version`, `module.description`) — The module-level `config { }` block now accepts three optional descriptive metadata keys: `module.name`, `module.version`, and `module.description`. All three are strings, all optional, and purely informational — they do not affect agent, run, or runtime behavior. Values are stored on `WorkflowMetadata.module` and round-trip through `jaiph format`. No semver validation is applied to `module.version`; any quoted string is accepted. Workflow-level `config` blocks reject `module.*` keys with `E_PARSE`, consistent with the existing `runtime.*` workflow guard. Future features (e.g. MCP tool metadata) may consume these fields. Implementation: `ALLOWED_KEYS` and `assignConfigKey` in `src/parse/metadata.ts`, `WorkflowMetadata.module` in `src/types.ts`, formatter round-trip in `src/format/emit.ts`, workflow-level rejection in `src/parse/workflows.ts`. Unit tests cover happy path, partial keys, coexistence with other config keys, formatter round-trip, and workflow-level rejection. Docs updated (`docs/configuration.md`, `docs/grammar.md`). - **Breaking — Docker:** Strict image contract and official GHCR runtime images — Docker mode now enforces a strict contract: every Docker image used by Jaiph must already contain a working `jaiph` CLI. Jaiph no longer auto-builds derived images or bootstraps itself into containers at runtime (no `npm pack`, no `npm install -g` into arbitrary base images). If the selected image lacks `jaiph`, the run fails immediately with `E_DOCKER_NO_JAIPH` and actionable guidance. The default `runtime.docker_image` is now `ghcr.io/jaiphlang/jaiph-runtime:` (matching the installed jaiph version), replacing the previous `node:20-bookworm` default. Official runtime images are published to GHCR: `ghcr.io/jaiphlang/jaiph-runtime:` for release tags, `:nightly` for the nightly branch, and `:latest` as a convenience alias. The official image includes Node.js, jaiph, `fuse-overlayfs`, and a non-root `jaiph` user (UID 10001); it does not include agent CLIs to keep the image minimal. The `jaiph init` Dockerfile template now extends the official image (`FROM ghcr.io/jaiphlang/jaiph-runtime:nightly`) and only adds agent CLIs (Claude Code, cursor-agent), instead of building from `ubuntu:latest` with a full install chain. Removed functions: `ensureLocalRuntimeImage`, `buildRuntimeImageFromLocalPackage`, `autoRuntimeImageTag`, `imageConfiguredUser`, `imageHomeDir`. Added: `verifyImageHasJaiph`, `GHCR_IMAGE_REPO`, `resolveDefaultImageTag`. CI: new `.github/workflows/docker-publish.yml` publishes the runtime image on release tags and nightly pushes. Implementation: `src/runtime/docker.ts`, `src/cli/commands/init.ts`, `docker/Dockerfile.runtime`. Unit and E2E tests updated for the strict contract — regression test confirms images without jaiph fail with `E_DOCKER_NO_JAIPH`. Docs updated (`docs/sandboxing.md`, `docs/configuration.md`, `docs/cli.md`, `docs/architecture.md`). diff --git a/QUEUE.md b/QUEUE.md index a1df4264..102f91b6 100644 --- a/QUEUE.md +++ b/QUEUE.md @@ -12,27 +12,6 @@ Process rules: *** -## Runtime — default Docker when not CI or unsafe #dev-ready - -**Goal** -When the user has not opted into "unsafe" local execution, workflows should run in Docker by default. **Default `runtime.docker_enabled` to on** only when **neither** `CI=true` **nor** `JAIPH_UNSAFE=true` is set in the environment. If either is set, default Docker to **off** unless explicitly overridden via `runtime.docker_enabled` / `JAIPH_DOCKER_ENABLED`. - -Introduce **`JAIPH_UNSAFE=true`** as the explicit "run on host / skip Docker default" escape hatch for local development when Docker is unwanted; document it next to `CI`. - -**Context** - -* Config resolution: `src/config.ts` — `resolveDockerConfig()` or equivalent; where `runtime.docker_enabled` default is determined. -* Env precedence: explicit `JAIPH_DOCKER_ENABLED` / in-file `runtime.docker_enabled` overrides defaults; then CI / unsafe default rule. -* E2E Docker tests: `e2e/tests/72_docker_run_artifacts.sh`, `e2e/tests/73_docker_dockerfile_detection.sh` — may need env setup adjustments. - -**Acceptance criteria** - -* `resolveDockerConfig()` (and any CLI preflight messaging) implements the precedence: explicit `JAIPH_DOCKER_ENABLED` / in-file `runtime.docker_enabled` overrides defaults; then apply CI / unsafe default rule. -* Unit tests for env combinations: plain local → Docker default on; `CI=true` → default off; `JAIPH_UNSAFE=true` → default off; both unset with explicit `JAIPH_DOCKER_ENABLED=false` → off. -* `CHANGELOG` + sandboxing / configuration docs updated. - -*** - ## Docker sandbox: Figure out a way to pass code changes from engineer.jh in docker mode to local env (git patch saved to .jaiph/runs?) ## Runtime — credential proxy for Docker mode diff --git a/docs/configuration.md b/docs/configuration.md index faa039f6..5a1b2891 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -17,7 +17,7 @@ All execution goes through the Node workflow runtime (`NodeWorkflowRuntime`), wh Jaiph provides three configuration mechanisms. When the same key is set in more than one place, the highest-priority source wins: -1. **Environment variables** — highest priority. `JAIPH_AGENT_*`, `JAIPH_RUNS_DIR`, `JAIPH_DEBUG`, `JAIPH_INBOX_PARALLEL`, and `JAIPH_DOCKER_*`. +1. **Environment variables** — highest priority. `JAIPH_AGENT_*`, `JAIPH_RUNS_DIR`, `JAIPH_DEBUG`, `JAIPH_INBOX_PARALLEL`, `JAIPH_DOCKER_*`, and `JAIPH_UNSAFE`. 2. **In-file `config { ... }` blocks** — at module scope and optionally inside a `workflow` body. 3. **Built-in defaults** — lowest priority, used when nothing else sets a value. @@ -170,7 +170,7 @@ These configure Docker sandboxing. Unlike agent and run keys, runtime keys are r | Key | Type | Default | Env variable | Description | |-----|------|---------|--------------|-------------| -| `runtime.docker_enabled` | boolean | `false` | `JAIPH_DOCKER_ENABLED` | Enable Docker for this run. | +| `runtime.docker_enabled` | boolean | `true` locally; `false` when `CI=true` or `JAIPH_UNSAFE=true` | `JAIPH_DOCKER_ENABLED` | Enable Docker for this run. See [Sandboxing -- Enabling Docker](sandboxing.md#enabling-docker) for the default rule. | | `runtime.docker_image` | string | `ghcr.io/jaiphlang/jaiph-runtime:` | `JAIPH_DOCKER_IMAGE` | Image name. Must already contain `jaiph`. When unset, Jaiph builds from `.jaiph/Dockerfile` if it exists, otherwise uses the official GHCR image matching the installed jaiph version. | | `runtime.docker_network` | string | `default` | `JAIPH_DOCKER_NETWORK` | Docker network mode. | | `runtime.docker_timeout` | integer | `300` | `JAIPH_DOCKER_TIMEOUT` | Timeout in seconds. Invalid or unparsable values fall back to the default. | @@ -187,7 +187,7 @@ For **agent and run keys**, resolution order (highest wins): 3. **Module-level `config`** — applies to workflows that don't define their own block. 4. **Built-in defaults.** -For **Docker / `runtime.*` keys**, the `jaiph run` driver merges **`JAIPH_DOCKER_*` env > module-level `runtime.*` > defaults**. Mounts (`runtime.workspace`) are never taken from env. Workflow-level config cannot set runtime keys. +For **Docker / `runtime.*` keys**, the `jaiph run` driver merges **`JAIPH_DOCKER_*` env > module-level `runtime.*` > CI/unsafe default rule**. The default rule enables Docker when neither `CI=true` nor `JAIPH_UNSAFE=true` is set (see [Sandboxing -- Enabling Docker](sandboxing.md#enabling-docker)). Mounts (`runtime.workspace`) are never taken from env. Workflow-level config cannot set runtime keys. ### Locked variables diff --git a/docs/index.html b/docs/index.html index 58771788..5c7793bb 100644 --- a/docs/index.html +++ b/docs/index.html @@ -462,8 +462,9 @@

Language

Runtime

-

Docker sandboxing. Enable isolated execution with Docker for stronger containment of - agent and shell actions. Configure in config { runtime.* }. See Docker sandboxing. Workflows run inside Docker by default for local development, providing + filesystem and process isolation for agent and shell actions. Disable with JAIPH_UNSAFE=true + or runtime.docker_enabled = false. See Sandboxing.

Hooks. Attach shell automation to workflow and step lifecycle events via ~/.jaiph/hooks.json or <project>/.jaiph/hooks.json. See "` | Container image. Must already contain `jaiph`. Defaults to the official GHCR runtime image matching the installed jaiph version. | | `runtime.docker_network` | string | `"default"` | Docker network mode. | | `runtime.docker_timeout` | integer | `300` | Max execution time in seconds. `0` disables the timeout. | @@ -78,9 +90,9 @@ Each key is type-checked at parse time. Unknown keys produce `E_PARSE`. #### Environment variable overrides -Following the `JAIPH_*` convention: `JAIPH_DOCKER_ENABLED`, `JAIPH_DOCKER_IMAGE`, `JAIPH_DOCKER_NETWORK`, `JAIPH_DOCKER_TIMEOUT`. Workspace mounts are not overridable via environment. +Following the `JAIPH_*` convention: `JAIPH_DOCKER_ENABLED`, `JAIPH_DOCKER_IMAGE`, `JAIPH_DOCKER_NETWORK`, `JAIPH_DOCKER_TIMEOUT`. Additionally, `CI` and `JAIPH_UNSAFE` affect the default for `runtime.docker_enabled` (see [Enabling Docker](#enabling-docker)). Workspace mounts are not overridable via environment. -Precedence: environment variable > in-file config > default. +Precedence: `JAIPH_DOCKER_ENABLED` env > in-file config > CI/unsafe default rule. If `JAIPH_DOCKER_TIMEOUT` is set but not a valid integer, the default (`300`) is used. diff --git a/e2e/test_all.sh b/e2e/test_all.sh index 63dd8f66..b3df284b 100755 --- a/e2e/test_all.sh +++ b/e2e/test_all.sh @@ -98,6 +98,7 @@ for script in "${TEST_SCRIPTS[@]}"; do e2e::section "Running ${script_name}" if JAIPH_E2E_SKIP_INSTALL=1 \ + JAIPH_UNSAFE="${JAIPH_UNSAFE:-true}" \ JAIPH_E2E_TMP_DIR="${JAIPH_E2E_TMP_DIR:-}" \ JAIPH_E2E_BIN_DIR="${JAIPH_E2E_BIN_DIR}" \ JAIPH_E2E_WORK_DIR="${JAIPH_E2E_WORK_DIR}" \ diff --git a/package.json b/package.json index ff3cda41..c342a059 100644 --- a/package.json +++ b/package.json @@ -19,7 +19,7 @@ "build:standalone": "npm run build && node -e \"const fs=require('node:fs'); fs.cpSync('dist/src/runtime','dist/runtime',{recursive:true});\" && bun build --compile ./src/cli.ts --outfile ./dist/jaiph", "test:compiler": "npm run build && node --test dist/src/compiler-test-runner.js", "test:golden-ast": "npm run build && node --test dist/src/golden-ast-runner.js", - "test": "npm run clean && npm run build && NODE_OPTIONS='--max-old-space-size=32768 --enable-source-maps' node --test dist/test/*.test.js $(find dist/src -name '*.test.js' -o -name '*.acceptance.test.js') dist/src/compiler-test-runner.js dist/src/golden-ast-runner.js", + "test": "npm run clean && npm run build && JAIPH_UNSAFE=true NODE_OPTIONS='--max-old-space-size=32768 --enable-source-maps' node --test dist/test/*.test.js $(find dist/src -name '*.test.js' -o -name '*.acceptance.test.js') dist/src/compiler-test-runner.js dist/src/golden-ast-runner.js", "test:acceptance:compiler": "npm run build && node --test $(find dist/src -name '*.acceptance.test.js')", "test:acceptance:runtime": "bash ./e2e/test_all.sh", "test:acceptance": "npm run test:acceptance:compiler && npm run test:acceptance:runtime", diff --git a/src/runtime/docker.test.ts b/src/runtime/docker.test.ts index e706697d..473a0803 100644 --- a/src/runtime/docker.test.ts +++ b/src/runtime/docker.test.ts @@ -139,9 +139,9 @@ test("parseMounts: throws when no workspace mount", () => { // resolveDockerConfig // --------------------------------------------------------------------------- -test("resolveDockerConfig: defaults when no in-file and no env", () => { +test("resolveDockerConfig: defaults when no in-file and no env — Docker on", () => { const cfg = resolveDockerConfig(undefined, {}); - assert.equal(cfg.enabled, false); + assert.equal(cfg.enabled, true); assert.ok(cfg.image.startsWith(GHCR_IMAGE_REPO + ":"), `default image should be GHCR: ${cfg.image}`); assert.equal(cfg.network, "default"); assert.equal(cfg.timeout, 300); @@ -183,6 +183,26 @@ test("resolveDockerConfig: env JAIPH_DOCKER_ENABLED=true overrides CI default", assert.equal(cfg.enabled, true); }); +test("resolveDockerConfig: JAIPH_UNSAFE=true disables Docker by default", () => { + const cfg = resolveDockerConfig(undefined, { JAIPH_UNSAFE: "true" }); + assert.equal(cfg.enabled, false); +}); + +test("resolveDockerConfig: JAIPH_UNSAFE=true with in-file override enables Docker", () => { + const cfg = resolveDockerConfig({ dockerEnabled: true }, { JAIPH_UNSAFE: "true" }); + assert.equal(cfg.enabled, true); +}); + +test("resolveDockerConfig: JAIPH_UNSAFE=true with env JAIPH_DOCKER_ENABLED=true enables Docker", () => { + const cfg = resolveDockerConfig(undefined, { JAIPH_UNSAFE: "true", JAIPH_DOCKER_ENABLED: "true" }); + assert.equal(cfg.enabled, true); +}); + +test("resolveDockerConfig: both CI and JAIPH_UNSAFE unset with explicit JAIPH_DOCKER_ENABLED=false disables", () => { + const cfg = resolveDockerConfig(undefined, { JAIPH_DOCKER_ENABLED: "false" }); + assert.equal(cfg.enabled, false); +}); + test("resolveDockerConfig: network env override", () => { const cfg = resolveDockerConfig(undefined, { JAIPH_DOCKER_NETWORK: "none" }); assert.equal(cfg.network, "none"); diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts index 06f6df5d..32cce1c5 100644 --- a/src/runtime/docker.ts +++ b/src/runtime/docker.ts @@ -145,21 +145,27 @@ const DEFAULTS: DockerRunConfig = { /** * Resolve effective Docker config. - * Precedence: env vars (`JAIPH_DOCKER_*`) > in-file RuntimeConfig > defaults. - * Docker is disabled by default; opt in via config or env. + * Precedence: env vars (`JAIPH_DOCKER_*`) > in-file RuntimeConfig > CI/unsafe default rule. + * + * Default rule (when no explicit override is set): + * - `CI=true` or `JAIPH_UNSAFE=true` → Docker off + * - Otherwise → Docker on */ export function resolveDockerConfig( inFile: RuntimeConfig | undefined, env: Record, ): DockerRunConfig { - // enabled: env > in-file > default (false) + // enabled: env JAIPH_DOCKER_ENABLED > in-file > CI/unsafe default rule let enabled: boolean; if (env.JAIPH_DOCKER_ENABLED !== undefined) { enabled = env.JAIPH_DOCKER_ENABLED === "true"; } else if (inFile?.dockerEnabled !== undefined) { enabled = inFile.dockerEnabled; } else { - enabled = DEFAULTS.enabled; + // Default: Docker on unless CI or unsafe mode is active + const isCI = env.CI === "true"; + const isUnsafe = env.JAIPH_UNSAFE === "true"; + enabled = !(isCI || isUnsafe); } // image: env > in-file > default From 6a0e25330d2186ae5b8a8ed34af03bd8a450385d Mon Sep 17 00:00:00 2001 From: Jakub Dzikowski Date: Sat, 18 Apr 2026 09:30:50 +0200 Subject: [PATCH 09/38] Feat: Export workspace patch on Docker run teardown Docker runs enforce an immutability contract: the host workspace is bind-mounted read-only and /jaiph/workspace is a sandbox-local copy-on-write layer discarded on exit. The only persistence channel to the host is the run-artifacts directory. During teardown, the runtime now automatically exports a workspace.patch file (git diff --binary) into the run directory so sandbox edits can be reviewed or applied on the host. Patch export is best-effort, owned by the runtime (not workflow logic), and runs regardless of workflow exit status. When there are no changes, the file is omitted. Co-Authored-By: Claude Opus 4.6 --- CHANGELOG.md | 1 + QUEUE.md | 27 +++++----- docs/architecture.md | 1 + docs/artifacts.md | 2 + docs/sandboxing.md | 4 ++ src/runtime/docker.test.ts | 60 +++++++++++++++++++++ src/runtime/docker.ts | 38 +++++++++++++ src/runtime/kernel/node-workflow-runtime.ts | 9 ++++ 8 files changed, 128 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 052f85e9..ccd2e664 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## All changes +- **Feature — Docker:** Workspace immutability contract and patch export — Docker runs now enforce an explicit immutability contract: the host workspace is bind-mounted read-only and the writable `/jaiph/workspace` inside the container is a sandbox-local copy-on-write layer discarded on exit. The only persistence channel from a Docker run to the host is the run-artifacts directory (`/jaiph/run` → host `.jaiph/runs`). During teardown, the runtime automatically exports a `workspace.patch` file (best-effort `git diff --binary` after `git add -N .`) into the run directory so sandbox edits can be reviewed or applied on the host with `git apply`. Patch export is runtime teardown behavior owned by `NodeWorkflowRuntime`, not workflow logic — it runs regardless of workflow exit status and failures are reported on stderr without changing the workflow's reported status. When there are no workspace changes, `workspace.patch` is omitted (not created). Non-Docker (local) runs are unaffected. Implementation: `exportWorkspacePatch()` in `src/runtime/docker.ts`, `exportPatchIfDocker()` in `src/runtime/kernel/node-workflow-runtime.ts`. Unit tests for non-empty patch, empty patch, and non-git directory added. Docs updated (`docs/sandboxing.md`, `docs/architecture.md`, `docs/artifacts.md`). - **Feature — Docker:** Default Docker when not CI or unsafe — Docker sandboxing is now **on by default** for local development. When neither `CI=true` nor `JAIPH_UNSAFE=true` is set in the environment, `runtime.docker_enabled` defaults to `true`. In CI environments or when `JAIPH_UNSAFE=true` is set, the default is `false`. Explicit overrides (`JAIPH_DOCKER_ENABLED` env var or in-file `runtime.docker_enabled`) always take precedence over the default rule. `JAIPH_UNSAFE=true` is the new explicit escape hatch for local development when Docker is unwanted. Implementation: `resolveDockerConfig()` in `src/runtime/docker.ts`. Unit tests for all env combinations added. Docs updated (`docs/sandboxing.md`, `docs/configuration.md`). - **Feature — Docker:** Harden Docker execution environment — Docker sandboxing now enforces least-privilege defaults and explicit boundary controls. Containers launch with `--cap-drop ALL --cap-add SYS_ADMIN --security-opt no-new-privileges`, dropping all Linux capabilities except the one required for fuse-overlayfs and preventing privilege escalation. A mount denylist rejects dangerous host paths (`/`, `/var/run/docker.sock`, `/run/docker.sock`, `/proc`, `/sys`, `/dev` and their subpaths) at validation time with `E_VALIDATE_MOUNT` — both in `validateMounts` and at `buildDockerArgs` time. An environment variable denylist (`SSH_*`, `GPG_*`, `AWS_*`, `GCP_*`, `AZURE_*`, `GOOGLE_*`, `DOCKER_*`, `KUBE*`, `NPM_TOKEN*`) prevents host credentials from leaking into the container; only `JAIPH_*` (except `JAIPH_DOCKER_*`) and agent prefixes (`ANTHROPIC_*`, `CLAUDE_*`, `CURSOR_*`) cross the container boundary. New exports: `validateMountHostPath`, `isEnvDenied`, `ENV_DENYLIST_PREFIXES`. Documentation adds a threat-model section (what Docker does and does not protect against), a failure-modes reference table (`E_DOCKER_*` / `E_VALIDATE_MOUNT` / `E_TIMEOUT`), expanded network-mode guidance, and the env denylist specification. Implementation: `src/runtime/docker.ts` (mount denylist, env denylist, security flags), `src/runtime/docker.test.ts` (unit tests for all new paths). Docs updated (`docs/sandboxing.md`). - **Feature — Language:** Optional module manifest keys (`module.name`, `module.version`, `module.description`) — The module-level `config { }` block now accepts three optional descriptive metadata keys: `module.name`, `module.version`, and `module.description`. All three are strings, all optional, and purely informational — they do not affect agent, run, or runtime behavior. Values are stored on `WorkflowMetadata.module` and round-trip through `jaiph format`. No semver validation is applied to `module.version`; any quoted string is accepted. Workflow-level `config` blocks reject `module.*` keys with `E_PARSE`, consistent with the existing `runtime.*` workflow guard. Future features (e.g. MCP tool metadata) may consume these fields. Implementation: `ALLOWED_KEYS` and `assignConfigKey` in `src/parse/metadata.ts`, `WorkflowMetadata.module` in `src/types.ts`, formatter round-trip in `src/format/emit.ts`, workflow-level rejection in `src/parse/workflows.ts`. Unit tests cover happy path, partial keys, coexistence with other config keys, formatter round-trip, and workflow-level rejection. Docs updated (`docs/configuration.md`, `docs/grammar.md`). diff --git a/QUEUE.md b/QUEUE.md index 102f91b6..231e8c35 100644 --- a/QUEUE.md +++ b/QUEUE.md @@ -12,46 +12,45 @@ Process rules: *** -## Docker sandbox: Figure out a way to pass code changes from engineer.jh in docker mode to local env (git patch saved to .jaiph/runs?) - -## Runtime — credential proxy for Docker mode +## Runtime — credential proxy for Docker mode #dev-ready **Goal** Containers should never hold real API keys. Implement a host-side HTTP proxy (the Phantom Token pattern) that intercepts outbound API requests from containers, strips a placeholder credential, and injects the real key from the host process environment before forwarding upstream. The workload in the container never receives the real secret. **Design** -1. **Host-side proxy** — A lightweight Node HTTP server bound to an address **reachable from the container network** (typically **`0.0.0.0:`** on the host; binding only `127.0.0.1` is often wrong for container-to-host access). For each request: replace placeholder auth with the real `ANTHROPIC_API_KEY` from the host, forward to the real Anthropic API base URL from host configuration, stream the response back (including SSE). -2. **Container env injection** — In `src/runtime/docker.ts` (`buildDockerArgs` / env passed into `-e`): pass `ANTHROPIC_API_KEY=` and `ANTHROPIC_BASE_URL=http://host.docker.internal:` (or `http://:`). Never pass the real key in `-e`. +1. **Host-side proxy** — A lightweight Node HTTP server bound to an address **reachable from the container network** (typically **`0.0.0.0:`** on the host; binding only `127.0.0.1` is often wrong for container-to-host access). For each request: replace placeholder auth with the real `ANTHROPIC_API_KEY` from the host, forward to the real Anthropic API base URL from host configuration (`ANTHROPIC_BASE_URL` when set, otherwise the Anthropic default), stream the response back (including SSE). +2. **Container env injection** — In `src/runtime/docker.ts` (`buildDockerArgs` / env passed into `-e`): pass `ANTHROPIC_API_KEY=` and `ANTHROPIC_BASE_URL=http://host.docker.internal:` (or `http://:`). Never pass the real key in `-e`. Use one **fixed placeholder string** (for example `__JAIPH_ANTHROPIC_KEY_PLACEHOLDER__`) defined in one place and shared by the proxy and Docker env wiring so tests stay deterministic. 3. **Linux networking** — When using the hostname `host.docker.internal`, add **`--add-host=host.docker.internal:host-gateway`** to the `docker run` argument list where supported so the name resolves inside the container. -4. **Backends (v1 scope)** — **Claude / Anthropic only.** The Anthropic SDK and `claude` CLI honor `ANTHROPIC_BASE_URL`. **Cursor (`cursor-agent`)** does not have a documented equivalent to `ANTHROPIC_BASE_URL` in public Cursor CLI docs; **leave Cursor and codex (`OPENAI_*`) out of this task** and open a follow-up if the product needs the same guarantee there. -5. **Routing** — **Single listen port** and a single Anthropic-compatible upstream in v1. Multi-upstream path routing is deferred. -6. **Non-goals (v1)** — Rate limits and audit logging. -7. **Lifecycle** — Start the proxy before the first `spawnDockerProcess` for that Jaiph process; stop it when tearing down the Docker run (and on Jaiph exit), with reference counting if multiple Docker runs can occur in one process. +4. **Activation** — Start the proxy when Docker mode is active **and** the host has a real `ANTHROPIC_API_KEY` to protect (if unset, no proxy). Non-Docker runs unchanged. +5. **Backends (v1 scope)** — **Claude / Anthropic only.** The Anthropic SDK and `claude` CLI honor `ANTHROPIC_BASE_URL`. **Cursor (`cursor-agent`)** does not have a documented equivalent to `ANTHROPIC_BASE_URL` in public Cursor CLI docs; **leave Cursor and codex (`OPENAI_*`) out of this task** and open a follow-up if the product needs the same guarantee there. +6. **Routing** — **Single listen port** and a single Anthropic-compatible upstream in v1. Multi-upstream path routing is deferred. +7. **Non-goals (v1)** — Rate limits and audit logging. +8. **Lifecycle** — Start the proxy immediately before `spawnDockerProcess` when activation applies; stop it in `cleanupDocker` (or paired helper) when the Docker run tears down. The only current call site is `src/cli/commands/run.ts` (one Docker run per CLI process); structure so multiple spawns could refcount later if needed. **Context** * Pattern reference: [NanoClaw credential proxy](https://jonno.nz/posts/nanoclaw-architecture-masterclass-in-doing-less/). -* **Implementation touchpoints** — `src/runtime/docker.ts` (primary: `-e` forwarding, optional extra Docker flags), `src/cli/commands/run.ts` (spawn/cleanup lifecycle). Agent CLI args/env preparation: `src/runtime/kernel/prompt.ts` (likely unchanged). +* **Implementation touchpoints** — New small module for the HTTP proxy; `src/runtime/docker.ts` (primary: `-e` forwarding, optional extra Docker flags); `src/cli/commands/run.ts` if wiring cannot live entirely in `docker.ts`. `src/runtime/kernel/prompt.ts` likely unchanged. Expect `src/runtime/docker.test.ts` updates for new `-e` behavior. * Image template: `.jaiph/Dockerfile`. **Queue coordination** -* This edits the same `docker.ts` / Docker spawn path as the queued **Docker — strict image contract + GHCR** task—land together or immediately after to reduce merge churn. +* Coordinate merges with other Docker/runtime work in this queue (for example **Docker sandbox: Figure out a way to pass code changes from engineer.jh in docker mode to local env**) to limit churn on `docker.ts` / `run.ts`. * Later **Runtime — harden Docker execution environment** may tighten env policy; document proxy-related variables when that work lands. **Acceptance criteria** -* Host-side proxy starts automatically when Docker mode is active (Anthropic/Claude path). +* Host-side proxy starts automatically under the activation rule above. * Containers receive only a placeholder `ANTHROPIC_API_KEY` — no real Anthropic API key in container environment. * `claude` CLI calls from inside Docker succeed via the proxy. * Proxy handles streaming responses (SSE) correctly. * Real keys do not appear in Jaiph-supplied container `-e` values (so they do not appear in `docker inspect` for those vars or in container `printenv` for them as anything but the placeholder). -* macOS and Linux: documented/working host reachability (`host.docker.internal` + `host-gateway` on Linux as needed, or an equivalent bridge address). +* macOS and Linux: documented/working host reachability (`host.docker.internal` + `host-gateway` on Linux as needed, or an equivalent bridge address). Update `docs/sandboxing.md` so the credential-forwarding section matches shipped behavior (replacing the prior “when the credential-proxy feature lands” wording). **Scope note** -* Target **\~3 files**: one small new module for the proxy plus focused edits in `docker.ts` and `run.ts`. Plain functions, no new abstraction layers. +* Target **~3 production files**: one small new module for the proxy plus focused edits in `docker.ts` and `run.ts`; tests updated alongside. Plain functions, no new abstraction layers. ## `jaiph serve` — expose workflows as an MCP server #dev-ready diff --git a/docs/architecture.md b/docs/architecture.md index 936e3022..b96d1bb9 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -57,6 +57,7 @@ All orchestration — local `jaiph run`, `jaiph test`, and **Docker `jaiph run`* - **Docker runtime helper (`src/runtime/docker.ts`)** - Parses mount specs, resolves Docker config (image, network, timeout), and builds the `docker run` invocation used by `jaiph run --docker`. The container runs the same `node-workflow-runner` process as local execution. The default image is the official `ghcr.io/jaiphlang/jaiph-runtime` GHCR image; every selected image must already contain `jaiph` (no auto-install or derived-image build at runtime). The spawn call uses `stdio: ["ignore", "pipe", "pipe"]` — stdin is ignored to prevent the Docker CLI from blocking on stdin EOF, which would stall event streaming and cause the host CLI to hang after the container exits. + - **Workspace immutability:** Docker runs cannot modify the host workspace. The host checkout is mounted read-only; `/jaiph/workspace` is a sandbox-local copy-on-write overlay discarded on exit. The only host-writable path is `/jaiph/run` (run artifacts). During teardown, `exportWorkspacePatch()` emits a `workspace.patch` file (best-effort `git diff --binary`) into the run directory so sandbox edits can be reviewed or applied on the host. See [Sandboxing](sandboxing.md) for the full contract. ## Runtime vs CLI responsibilities diff --git a/docs/artifacts.md b/docs/artifacts.md index be79f8ed..62abad47 100644 --- a/docs/artifacts.md +++ b/docs/artifacts.md @@ -22,6 +22,7 @@ The runtime uses a UTC-dated hierarchy. Each run gets its own folder: date, then inbox/ # inbox message files (when channels are used) .seq # step-sequence counter (kernel/seq-alloc.ts) run_summary.jsonl # durable event timeline + workspace.patch # (Docker only) git diff of sandbox workspace changes ``` Sequence prefixes are **monotonic and unique** per run (allocated in the kernel), so artifact names sort in execution order. For how this fits into the CLI and kernel, see [Architecture — Durable artifact layout](architecture.md#durable-artifact-layout). @@ -32,6 +33,7 @@ Sequence prefixes are **monotonic and unique** per run (allocated in the kernel) - **`run_summary.jsonl`** — Append-only JSONL timeline: workflow boundaries, step start/end, structured log lines, inbox-related events. Useful for tooling and post-run analysis. - **`inbox/`** — When you use channels, message payloads can be reflected as files under the run for inspection (see [Inbox & Dispatch](inbox.md)). - **`.seq`** — Internal counter backing the numeric prefixes; you normally do not edit it. +- **`workspace.patch`** — (Docker runs only) A `git diff --binary` patch capturing all workspace modifications made during the run. Generated automatically during runtime teardown when Docker sandboxing is enabled and the workspace has changes. The patch is sufficient to review or `git apply` on the host. Omitted when there are no workspace changes. See [Sandboxing — Workspace patch export](sandboxing.md#runtime-behavior). ## Keeping runs out of git diff --git a/docs/sandboxing.md b/docs/sandboxing.md index 6445008e..620e3896 100644 --- a/docs/sandboxing.md +++ b/docs/sandboxing.md @@ -143,6 +143,10 @@ The working directory is `/jaiph/workspace`. The host CLI generates `overlay-run **Run artifacts** -- The host CLI mounts the resolved host runs root at `/jaiph/run:rw` inside the container. By default this is `.jaiph/runs` under the workspace; a relative `JAIPH_RUNS_DIR` is resolved under the workspace; an absolute `JAIPH_RUNS_DIR` must stay within the workspace or the run fails with `E_DOCKER_RUNS_DIR`. `JAIPH_RUNS_DIR` is set to `/jaiph/run` inside the container, so the runtime writes artifacts directly into the requested host path. +**Workspace immutability contract** -- Docker runs cannot directly modify the host workspace. The host checkout is bind-mounted read-only; the writable `/jaiph/workspace` inside the container is a sandbox-local copy-on-write layer (fuse-overlayfs or copy fallback) whose state is discarded on container exit. The only persistence channel from a Docker run to the host is the run-artifacts directory (`/jaiph/run` → host `.jaiph/runs`). Non-Docker (local) runs are unaffected by this contract. + +**Workspace patch export** -- When a Docker-backed run modifies files under `/jaiph/workspace`, the runtime automatically exports a `workspace.patch` file into the run directory during teardown (`exportWorkspacePatch` in `docker.ts`, called from `NodeWorkflowRuntime`). The patch is generated with `git diff --binary` (after `git add -N .` for untracked files) and is sufficient to review or `git apply` on the host. Patch export is best-effort: it runs regardless of workflow exit status, and failures are reported on stderr without changing the workflow's reported status. When there are no workspace changes, the `workspace.patch` file is omitted (not created). The bundled `.jaiph/Dockerfile` image includes `git`. + **Network** -- `"default"` omits `--network`, which uses Docker's default bridge network (outbound access allowed). `"none"` passes `--network none` and fully disables networking -- use this for workflows that should not make external calls. Any other value (e.g. a custom Docker network name) is passed through as-is. Set `runtime.docker_network` in config or `JAIPH_DOCKER_NETWORK` in the environment. **Timeout** -- When `runtime.docker_timeout` is greater than zero, the CLI sends `SIGTERM` to the container process on overrun, followed by `SIGKILL` after a 5-second grace period. The failure message includes `E_TIMEOUT container execution exceeded timeout`. diff --git a/src/runtime/docker.test.ts b/src/runtime/docker.test.ts index 473a0803..23dd7c06 100644 --- a/src/runtime/docker.test.ts +++ b/src/runtime/docker.test.ts @@ -18,6 +18,7 @@ import { isEnvDenied, ENV_DENYLIST_PREFIXES, GHCR_IMAGE_REPO, + exportWorkspacePatch, type MountSpec, type DockerRunConfig, type DockerSpawnOptions, @@ -696,3 +697,62 @@ test("buildDockerArgs: includes --cap-drop ALL and --security-opt no-new-privile assert.ok(secOptIdx >= 0, "--security-opt present"); assert.equal(args[secOptIdx + 1], "no-new-privileges"); }); + +// --------------------------------------------------------------------------- +// exportWorkspacePatch +// --------------------------------------------------------------------------- + +test("exportWorkspacePatch writes patch when git repo has changes", () => { + const dir = mkdtempSync(join(tmpdir(), "jaiph-patch-test-")); + const patchOut = join(dir, "workspace.patch"); + try { + const { execSync } = require("node:child_process"); + execSync("git init", { cwd: dir, stdio: "ignore" }); + execSync("git config user.email test@test.com", { cwd: dir, stdio: "ignore" }); + execSync("git config user.name test", { cwd: dir, stdio: "ignore" }); + // Create initial commit so diff has a baseline + writeFileSync(join(dir, "initial.txt"), "initial\n"); + execSync("git add . && git commit -m init", { cwd: dir, stdio: "ignore" }); + // Make a change + writeFileSync(join(dir, "new-file.txt"), "hello\n"); + + const result = exportWorkspacePatch(dir, patchOut); + assert.equal(result, true, "should return true when patch is non-empty"); + assert.ok(existsSync(patchOut), "patch file should exist"); + const content = readFileSync(patchOut, "utf8"); + assert.ok(content.includes("new-file.txt"), "patch should reference the new file"); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test("exportWorkspacePatch returns false and omits file when no changes", () => { + const dir = mkdtempSync(join(tmpdir(), "jaiph-patch-test-")); + const patchOut = join(dir, "workspace.patch"); + try { + const { execSync } = require("node:child_process"); + execSync("git init", { cwd: dir, stdio: "ignore" }); + execSync("git config user.email test@test.com", { cwd: dir, stdio: "ignore" }); + execSync("git config user.name test", { cwd: dir, stdio: "ignore" }); + writeFileSync(join(dir, "initial.txt"), "initial\n"); + execSync("git add . && git commit -m init", { cwd: dir, stdio: "ignore" }); + + const result = exportWorkspacePatch(dir, patchOut); + assert.equal(result, false, "should return false when no changes"); + assert.ok(!existsSync(patchOut), "patch file should not exist"); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test("exportWorkspacePatch returns false for non-git directory", () => { + const dir = mkdtempSync(join(tmpdir(), "jaiph-patch-test-")); + const patchOut = join(dir, "workspace.patch"); + try { + const result = exportWorkspacePatch(dir, patchOut); + assert.equal(result, false, "should return false for non-git dir"); + assert.ok(!existsSync(patchOut), "patch file should not exist"); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); diff --git a/src/runtime/docker.ts b/src/runtime/docker.ts index 32cce1c5..f0b7606d 100644 --- a/src/runtime/docker.ts +++ b/src/runtime/docker.ts @@ -624,6 +624,44 @@ export function cleanupDocker(result: DockerSpawnResult): void { } } +// --------------------------------------------------------------------------- +// Workspace patch export (Docker teardown) +// --------------------------------------------------------------------------- + +/** + * Export a git diff of workspace changes to a patch file. + * Used during Docker run teardown to capture sandbox-local modifications. + * + * Contract: + * - When there are changes, writes `workspace.patch` (git apply-able). + * - When there are no changes, the file is omitted (not created). + * - Best-effort: failures are reported on stderr but do not affect workflow exit status. + * + * @returns true if a non-empty patch was written. + */ +export function exportWorkspacePatch(workspaceDir: string, outputPath: string): boolean { + try { + // Stage intent-to-add for untracked files so they appear in git diff + execSync("git add -N .", { cwd: workspaceDir, stdio: "ignore", timeout: 30_000 }); + } catch { + // Not a git repo or no new files — continue to diff + } + try { + const diff = execSync("git diff --binary", { + cwd: workspaceDir, + timeout: 60_000, + maxBuffer: 50 * 1024 * 1024, + }); + if (!diff || diff.length === 0) return false; + writeFileSync(outputPath, diff); + return true; + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + process.stderr.write(`jaiph docker: workspace patch export failed: ${msg}\n`); + return false; + } +} + export function findRunArtifacts( sandboxRunDir: string, ): { runDir?: string; summaryFile?: string } { diff --git a/src/runtime/kernel/node-workflow-runtime.ts b/src/runtime/kernel/node-workflow-runtime.ts index 9678840c..2e02a87d 100644 --- a/src/runtime/kernel/node-workflow-runtime.ts +++ b/src/runtime/kernel/node-workflow-runtime.ts @@ -17,6 +17,7 @@ import { plainMultilineOrchestrationForRuntime, tripleQuotedRawForRuntime, } from "../orchestration-text"; +import { CONTAINER_WORKSPACE, exportWorkspacePatch } from "../docker"; const MAX_EMBED = 1024 * 1024; const MAX_RECURSION_DEPTH = 256; @@ -388,6 +389,13 @@ export class NodeWorkflowRuntime { } } + /** Best-effort: export workspace changes as a patch file for Docker runs. */ + private exportPatchIfDocker(): void { + const ws = this.env.JAIPH_WORKSPACE; + if (ws !== CONTAINER_WORKSPACE) return; + exportWorkspacePatch(ws, join(this.runDir, "workspace.patch")); + } + async runDefault(args: string[]): Promise { this.emitWorkflow("WORKFLOW_START", "default"); const rootScope: Scope = { @@ -411,6 +419,7 @@ export class NodeWorkflowRuntime { }); const result = await this.executeWorkflow(resolved.filePath, resolved.workflow.name, rootScope, args, false); this.emitWorkflow("WORKFLOW_END", "default"); + this.exportPatchIfDocker(); this.stopHeartbeat(); return result.status; } From 348e3e4da0c56d2a2b93b68c1b791ad82d813aa3 Mon Sep 17 00:00:00 2001 From: Jakub Dzikowski Date: Sun, 19 Apr 2026 14:01:08 +0200 Subject: [PATCH 10/38] Remove target design documentation file to streamline project focus and eliminate outdated content. Signed-off-by: Jakub Dzikowski --- QUEUE.md | 216 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 155 insertions(+), 61 deletions(-) diff --git a/QUEUE.md b/QUEUE.md index 231e8c35..678b851d 100644 --- a/QUEUE.md +++ b/QUEUE.md @@ -9,99 +9,193 @@ Process rules: 5. This queue assumes **hard rewrite semantics**: * breaking changes are allowed, * backward compatibility is **not** a design goal unless a task explicitly says otherwise. +6. **Acceptance criteria are non-negotiable.** A task is not done until every acceptance bullet is verified by a test that fails when the contract is violated. "It works on my machine" or "the existing tests pass" is not acceptance. *** -## Runtime — credential proxy for Docker mode #dev-ready +## Language/Runtime — add `recover` loop semantics for non-isolated `run` #dev-ready **Goal** -Containers should never hold real API keys. Implement a host-side HTTP proxy (the Phantom Token pattern) that intercepts outbound API requests from containers, strips a placeholder credential, and injects the real key from the host process environment before forwarding upstream. The workload in the container never receives the real secret. +Add `recover` as a first-class repair-and-retry primitive distinct from `catch`. Ship for non-isolated, non-async `run` first. Async composition lands in the next task, not here. -**Design** +**Scope** -1. **Host-side proxy** — A lightweight Node HTTP server bound to an address **reachable from the container network** (typically **`0.0.0.0:`** on the host; binding only `127.0.0.1` is often wrong for container-to-host access). For each request: replace placeholder auth with the real `ANTHROPIC_API_KEY` from the host, forward to the real Anthropic API base URL from host configuration (`ANTHROPIC_BASE_URL` when set, otherwise the Anthropic default), stream the response back (including SSE). -2. **Container env injection** — In `src/runtime/docker.ts` (`buildDockerArgs` / env passed into `-e`): pass `ANTHROPIC_API_KEY=` and `ANTHROPIC_BASE_URL=http://host.docker.internal:` (or `http://:`). Never pass the real key in `-e`. Use one **fixed placeholder string** (for example `__JAIPH_ANTHROPIC_KEY_PLACEHOLDER__`) defined in one place and shared by the proxy and Docker env wiring so tests stay deterministic. -3. **Linux networking** — When using the hostname `host.docker.internal`, add **`--add-host=host.docker.internal:host-gateway`** to the `docker run` argument list where supported so the name resolves inside the container. -4. **Activation** — Start the proxy when Docker mode is active **and** the host has a real `ANTHROPIC_API_KEY` to protect (if unset, no proxy). Non-Docker runs unchanged. -5. **Backends (v1 scope)** — **Claude / Anthropic only.** The Anthropic SDK and `claude` CLI honor `ANTHROPIC_BASE_URL`. **Cursor (`cursor-agent`)** does not have a documented equivalent to `ANTHROPIC_BASE_URL` in public Cursor CLI docs; **leave Cursor and codex (`OPENAI_*`) out of this task** and open a follow-up if the product needs the same guarantee there. -6. **Routing** — **Single listen port** and a single Anthropic-compatible upstream in v1. Multi-upstream path routing is deferred. -7. **Non-goals (v1)** — Rate limits and audit logging. -8. **Lifecycle** — Start the proxy immediately before `spawnDockerProcess` when activation applies; stop it in `cleanupDocker` (or paired helper) when the Docker run tears down. The only current call site is `src/cli/commands/run.ts` (one Docker run per CLI process); structure so multiple spawns could refcount later if needed. +* Keep existing `catch` behavior as one-attempt try/catch. +* Add: -**Context** + ```jh + run sth() recover(err) { + ... + } + ``` -* Pattern reference: [NanoClaw credential proxy](https://jonno.nz/posts/nanoclaw-architecture-masterclass-in-doing-less/). -* **Implementation touchpoints** — New small module for the HTTP proxy; `src/runtime/docker.ts` (primary: `-e` forwarding, optional extra Docker flags); `src/cli/commands/run.ts` if wiring cannot live entirely in `docker.ts`. `src/runtime/kernel/prompt.ts` likely unchanged. Expect `src/runtime/docker.test.ts` updates for new `-e` behavior. -* Image template: `.jaiph/Dockerfile`. + with loop semantics: try, bind failure, run repair block, retry, stop on success or retry-limit exhaustion. +* Add a small explicit retry limit (default 10) with config override. +* Keep the runtime behavior simple and observable; do not introduce speculative control-flow abstractions. -**Queue coordination** +**Required tests** -* Coordinate merges with other Docker/runtime work in this queue (for example **Docker sandbox: Figure out a way to pass code changes from engineer.jh in docker mode to local env**) to limit churn on `docker.ts` / `run.ts`. -* Later **Runtime — harden Docker execution environment** may tighten env policy; document proxy-related variables when that work lands. +* Parser / formatter / validation coverage for `recover`. +* Runtime tests for: + - success on first attempt + - one or more repair loops before success + - retry limit exhaustion + - retry limit configured via `config` +* At least one acceptance test using `recover` to repair and retry a failing run. **Acceptance criteria** -* Host-side proxy starts automatically under the activation rule above. -* Containers receive only a placeholder `ANTHROPIC_API_KEY` — no real Anthropic API key in container environment. -* `claude` CLI calls from inside Docker succeed via the proxy. -* Proxy handles streaming responses (SSE) correctly. -* Real keys do not appear in Jaiph-supplied container `-e` values (so they do not appear in `docker inspect` for those vars or in container `printenv` for them as anything but the placeholder). -* macOS and Linux: documented/working host reachability (`host.docker.internal` + `host-gateway` on Linux as needed, or an equivalent bridge address). Update `docs/sandboxing.md` so the credential-forwarding section matches shipped behavior (replacing the prior “when the credential-proxy feature lands” wording). - -**Scope note** +* `recover` is distinct from `catch`. +* The retry limit is explicit and configurable. +* Tests prove loop behavior and limit handling. +* The docs-site Jaiph syntax highlighter (`docs/assets/js/main.js`, the `STATEMENT_KEYWORDS` set and any keyword-flow special cases) recognizes `recover` as a keyword. Any `.jh` code block on the docs site that uses `recover` renders with the keyword colored. -* Target **~3 production files**: one small new module for the proxy plus focused edits in `docker.ts` and `run.ts`; tests updated alongside. Plain functions, no new abstraction layers. +*** -## `jaiph serve` — expose workflows as an MCP server #dev-ready +## Runtime — spec and implement `Handle` for `run async`, including `recover` composition #dev-ready **Goal** -Add a `jaiph serve ` command that starts a stdio MCP server. Each top-level workflow in the file becomes a callable MCP tool. This lets any MCP client (Cursor, Claude Desktop, custom agents) invoke Jaiph workflows directly. +Replace the current implicit end-of-workflow join with a value-based handle model. `run async foo()` returns a `Handle` immediately. The handle resolves on first non-passthrough read. Workflow exit implicitly joins remaining unresolved handles. Ship `recover` composition for `run async` in the same task. -**Context** +This task ships **both the written spec and the runtime implementation in one go.** The previous attempt split them across two tasks and the spec drifted from the implementation. Keep them together so the contract and the code land in the same review. -* MCP (Model Context Protocol) uses JSON-RPC 2.0 over stdio. A server must handle `initialize`, `tools/list`, and `tools/call`. -* Jaiph already has a runtime (`src/runtime/kernel/node-workflow-runtime.ts`) that can execute workflows and capture output. -* The `@modelcontextprotocol/sdk` npm package provides a Node.js server implementation, but the protocol is simple enough to implement directly (\~200 lines for stdio JSON-RPC + the three methods). +**Scope** -**Phase 1 — single text input (this task)** +* Write the spec section in `docs/spec-async-handles.md` (a new file) covering: + - `Handle` value model: a handle resolves to whatever the called function returned. First non-passthrough read forces resolution. Passthrough (assignment, storage, passing through arguments and returns unchanged) does not. + - Workflow exit implicitly joins any remaining unresolved handles; this is not an error. + - No fire-and-forget mode. + - `recover` composition: `b1 = run async foo() recover(err) { ... }` — handle resolves to either the eventual success value (after the retry loop runs) or the final failure. Same retry-limit semantics as the non-async `recover` task. +* Replace the implicit end-of-workflow join in `src/runtime/kernel/node-workflow-runtime.ts` with the value-based handle model. +* `run async ...` returns a `Handle` value. `T` is the same return type the function would have under a non-async `run`. +* Reads that force resolution: passing as an argument to `run`, string interpolation, comparison, conditional branching, any other access to the underlying value. +* Passthrough (assignment, storing in a list, passing through `workflow` arguments and returns unchanged) does not force resolution. +* Workflow exit implicitly joins unresolved handles. This preserves today's end-of-workflow behavior at the boundary. +* Parser must accept `recover(err) { ... }` after `run async ref(args)`. The previous attempt had the parser silently reject this with a "trailing content" error — that is the failure mode to fix. +* Preserve async progress/event visibility unless the contract forces an intentional change. +* Update docs that still describe the old statement-based async model. + +**Required tests** + +* Parser / formatter / validation coverage for `run async ref(args) recover(err) { ... }`. +* Runtime tests for handle creation, transparent resolution at first read, and resolution forced by passing a handle into another `run`. +* Runtime test for the multi-handle join shape: multiple async handles passed into another call all resolve before the callee runs. +* Runtime test that workflow exit joins unresolved handles without raising an error. +* Runtime test that handles can be stored in a list and resolved when read. +* Runtime test for `run async foo() recover(err) { ... }`: handle resolves to the success value after at least one repair loop. +* Runtime test that the retry-limit semantics are shared with the non-async `recover` task. -Each workflow becomes a tool with a single `input` string parameter: +**Acceptance criteria** + +* `run async ...` returns a first-class handle value. +* Handle reads force resolution per the spec. +* Workflow exit implicitly joins remaining handles (no error). +* `recover` works on `run async ref()`. The parser accepts the form; the runtime implements the spec contract. +* Spec and implementation ship in the same change set; the spec is internally consistent and self-contained. +* The docs-site Jaiph syntax highlighter (`docs/assets/js/main.js`) recognizes `async` as a keyword (modifier on `run`) and continues to highlight `recover` correctly when it appears as `recover(err) { ... }` after `run async ref(args)`. A docs code block with `b1 = run async foo() recover(err) { ... }` renders with `run`, `async`, and `recover` all colored. + +*** -```json -{ - "name": "analyze_gaps", - "description": "workflow analyze_gaps from qa.jh", - "inputSchema": { - "type": "object", - "properties": { - "input": { "type": "string", "description": "Text input passed to the workflow" } - } +## Artifacts — runtime mount + `artifacts.jh` lib for publishing files out of the sandbox #dev-ready + +**Goal** +Give workflows a clean, versatile way to publish files from inside the whole-program Docker sandbox to a host-readable location. Split the work across two layers: + +* **Runtime layer** (in `src/runtime/`): expose a writable artifacts directory inside the sandbox at a stable path, mapped to `.jaiph/runs//artifacts/` on the host. No new language primitive; the runtime's only job is to mount and to surface the path via env var. +* **Library layer** (in `.jaiph/libs/jaiphlang/`): ship a new `artifacts.jh` lib (mirroring the existing `queue.jh` / `queue.py` pair) with `export workflow` entries for the common operations. Userspace imports the lib explicitly: + + ```jh + import "jaiphlang/artifacts.jh" as artifacts + + workflow default() { + run artifacts.save("./build/output.bin", "build-output.bin") + run artifacts.save_patch("snapshot.patch") } -} -``` + ``` + +This keeps the runtime minimal (just a mount), makes the surface library-shaped (so it's discoverable and replaceable), and matches the established `queue.jh` pattern. + +**Context (read before starting)** + +* Today's whole-program Docker sandbox in `src/runtime/docker.ts` already mounts the run directory writable at `/jaiph/run`. Artifacts will live in a subdirectory of that mount; no new mount is needed. +* The existing lib pattern is `.jaiph/libs/jaiphlang/queue.jh` paired with `.jaiph/libs/jaiphlang/queue.py` (a small Python helper invoked via `import script ... as queue`). Follow that pattern. +* The `isolated` keyword is not part of this codebase. This task is about the whole-program Docker sandbox only; no per-call isolation primitive exists or is to be introduced. -The `input` value is injected into the workflow environment as `JAIPH_MCP_INPUT` (accessible via `${input}` interpolation or `$JAIPH_MCP_INPUT` in scripts). The tool response is the workflow's captured output (log messages + prompt results). +**Scope** + +**Runtime layer:** + +* Ensure `.jaiph/runs//artifacts/` exists on the host before the sandbox starts (`mkdirSync` with `recursive: true`). +* The existing `/jaiph/run` mount in the container already exposes the artifacts subdirectory implicitly. Verify it does, and that writes inside the container land at `.jaiph/runs//artifacts/` on the host. +* Surface the in-container artifacts path to userspace via an env var. Suggested name: `JAIPH_ARTIFACTS_DIR` (defaulting to `/jaiph/run/artifacts` in the container, `/artifacts` on the host when running without the sandbox). The library reads this env var rather than hardcoding the path. +* When running on the host (no sandbox), `JAIPH_ARTIFACTS_DIR` points at the host artifacts directory directly so the same lib works. + +**Library layer:** + +* Add `.jaiph/libs/jaiphlang/artifacts.jh` and `.jaiph/libs/jaiphlang/artifacts.py` (or `.sh` if it stays a one-liner). Mirror the `queue.jh` / `queue.py` shape exactly — no novel patterns. +* Provide these `export workflow` entries: + - `save(local_path, name)` — copies the file at `local_path` into `${JAIPH_ARTIFACTS_DIR}/${name}`. Returns the host-resolved absolute path as a string. + - `save_patch(name)` — runs `git diff` (working tree vs HEAD) inside the sandbox workspace, writes it to `${JAIPH_ARTIFACTS_DIR}/${name}`. Returns the host-resolved absolute path. + - `apply_patch(path)` — applies a patch file to the current workspace via `git apply`. Useful for replaying artifacts across runs. +* The lib must work both inside the sandbox and on the host (when the user runs `jaiph` without the Docker sandbox). The only difference is what `JAIPH_ARTIFACTS_DIR` resolves to. +* Document that `save_patch` excludes `.jaiph/` from the produced patch (the runtime writes its own state under `.jaiph/`; including it in a patch would clobber state on apply). The exclusion lives in the lib's helper script, not in the runtime, and is documented inline next to the implementation. -**Phase 2 — typed parameters (future task)** +**Required tests** -Extend the language with workflow parameters: `workflow analyze(file: string, depth: number) { ... }`. These map directly to the tool's `inputSchema`. Not in scope for this task. +* **Runtime tests**: + - `JAIPH_ARTIFACTS_DIR` is set inside the sandbox and points at a writable directory. + - `JAIPH_ARTIFACTS_DIR` is set when running on the host (no sandbox) and points at `.jaiph/runs//artifacts/`. + - The artifacts directory exists before the sandbox starts (no race where the lib tries to write before the dir exists). +* **Library tests**: + - `artifacts.save(local_path, name)`: file is created at the host path; return value matches that path; file content equals the source. + - `artifacts.save_patch(name)`: produces a non-empty patch when the workspace has uncommitted changes; produces an empty (or absent) patch when the workspace is clean; the patch does not reference `.jaiph/` even when `.jaiph/` files have changed. + - `artifacts.apply_patch(path)`: applies a previously-saved patch cleanly; fails with a clear error when the patch does not apply. +* **End-to-end**: + - One `.jh` example workflow that imports `jaiphlang/artifacts.jh`, calls `artifacts.save` and `artifacts.save_patch`, runs under the sandbox, and the test asserts both files appear on the host at the expected paths. + +**Acceptance criteria** + +* `.jaiph/runs//artifacts/` exists, is writable from inside the sandbox, and survives sandbox teardown (it's on the host filesystem via the existing mount). +* `JAIPH_ARTIFACTS_DIR` is exposed in both sandbox and host execution; the lib reads it rather than hardcoding paths. +* `.jaiph/libs/jaiphlang/artifacts.jh` ships with `save`, `save_patch`, `apply_patch` as `export workflow` entries, mirroring the `queue.jh` lib shape. +* The lib works identically inside the sandbox and on the host. +* `save_patch`'s `.jaiph/` exclusion is documented inline in the helper script. +* No new runtime language primitive is introduced. The user-facing surface is `import` + workflow calls. +* The docs-site documentation is updated to describe the artifacts lib alongside the queue lib (`docs/libraries.md` or equivalent). + +*** + +## Runtime — PTY-based TTY test for `run async` #dev-ready + +**Goal** +Live progress for `run async` (with handles, deferred resolution, multi-branch fan-out without isolation) takes a different render path than synchronous steps. Close the regression-coverage gap by exercising that path through a real PTY. + +**Context (read before starting)** + +`e2e/tests/81_tty_progress_tree.sh` already uses Python's `pty.openpty()` to drive `jaiph run` under a real TTY and asserts on the rendered progress frames. It covers non-async workflows. There is no equivalent for `run async`. The host progress renderer takes a different path for async (handles, deferred resolution, multiple in-flight calls competing for the live frame), and that path has been broken before without any test catching it. **Scope** -1. **CLI command** (`src/cli/commands/serve.ts`): add `jaiph serve ` that parses the file, starts a stdio JSON-RPC server, and handles `initialize`, `tools/list`, `tools/call`. -2. **Tool listing**: read the parsed module's `workflows` array. Each workflow becomes a tool entry with `name` \= workflow name, `description` \= `"workflow from "`, `inputSchema` \= single `input` string. -3. **Tool execution**: on `tools/call`, run the named workflow using the existing runtime. Capture all output (logs, prompt results). Return as `content: [{ type: "text", text: output }]`. -4. **Error handling**: if the workflow fails, return `isError: true` with the error message. -5. **Config inheritance**: the `.jh` file's `config { ... }` block applies normally (backend, model, etc.). -6. **E2E test**: a test that starts `jaiph serve` with a simple workflow, sends JSON-RPC messages via stdin, and verifies the tool list and a tool call response. -7. **Docs**: add a section to `docs/index.html` and `docs/jaiph-skill.md` about MCP server mode. +* Add an e2e test (sibling of `e2e/tests/81_tty_progress_tree.sh`) that: + * spawns `jaiph run` under a real PTY, + * exercises a workflow that uses `run async branch()` with at least two concurrent async calls, + * each branch emits multiple progress events over time (use a deterministic step like a sleep loop with `print` calls — do not depend on `prompt claude` or any other non-deterministic step), + * captures the PTY output and asserts: + 1. each branch's progress events appear under that branch's node in the tree as they happen, + 2. the final frame shows both branches as completed with their resolved return values, + 3. no ANSI corruption (orphaned escape sequences, stray cursor moves outside the rendered region). +* The test must fail today against any regression that batches async progress events at branch completion, drops them, or scrambles the frame. + +**Non-goals** + +* Do not test `prompt claude` or any non-deterministic step. Branches must emit synthetic, time-spaced events. +* Do not assert on exact frame timing; assert on order and presence within a generous timeout. +* No `isolated` variant — that keyword is not part of this codebase. **Acceptance criteria** -* `jaiph serve examples/greeting.jh` starts a stdio MCP server. -* `tools/list` returns one tool per workflow. -* `tools/call` executes the workflow and returns its output. -* Errors produce `isError: true` responses (no server crash). -* E2E test passes. +* New test lives next to `e2e/tests/81_tty_progress_tree.sh` and follows the same shell-driving-Python-PTY pattern. +* The test passes on a green build and fails when the live-progress path for `run async` regresses. +* Test runs as part of the standard e2e suite (no separate invocation). *** From 58bbcca54e95974f48a3d8f393f7a3bc6e3e7ef7 Mon Sep 17 00:00:00 2001 From: Jakub Dzikowski Date: Mon, 20 Apr 2026 13:57:20 +0200 Subject: [PATCH 11/38] Queue: Add cleanup tasks Signed-off-by: Jakub Dzikowski --- QUEUE.md | 201 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) diff --git a/QUEUE.md b/QUEUE.md index 678b851d..68afd335 100644 --- a/QUEUE.md +++ b/QUEUE.md @@ -199,3 +199,204 @@ Live progress for `run async` (with handles, deferred resolution, multi-branch f * Test runs as part of the standard e2e suite (no separate invocation). *** + +## Cleanup — delete top-level debug cruft and harden `.gitignore` #dev-ready + +**Goal** +The repo root contains 22+ leftover debug directories from an abandoned per-call isolated experiment (`docker-nested-arg.*`, `docker-nested-clean.*`, `overlay-warn.*`, `nested-run-arg.*`, `local-nested-arg.*`, `overlay-manual.*`, `docker-live-debug.*`), plus stale `.tmp`, `.tmp-build`, `.tmp-debug`, `.tmp_run_debug`, `QUEUE.md.tmp.4951`, `safe_name`, top-level `lib/`, top-level `run/`. None are in `.gitignore`. Fix that, in one pass, so the workspace is readable at a glance and these don't return. + +**Scope** + +* Delete every leftover debug directory at the repo root matching `docker-*/`, `nested-*/`, `overlay-*/`, `local-*/`, `.tmp*/`. Verify with `git ls-files ` first that they are not tracked (they should not be). +* Investigate three suspicious top-level paths: `safe_name`, `lib/`, `run/`. The default disposition is **delete**. Only keep one if you can identify a live consumer in the source tree (search with `rg`/`grep` for the path string). If a consumer exists, document it inline next to the deletion decision. +* Delete tracked cruft files: `safe_name` and `QUEUE.md.tmp.4951`. Verify they are tracked first (`git ls-files`); use `git rm` rather than `rm` for tracked paths. +* Add patterns to `.gitignore` so they cannot return without a deliberate override: + - `docker-*/` + - `nested-*/` + - `overlay-*/` + - `local-*/` + - `.tmp*/` + - `QUEUE.md.tmp.*` +* Sanity-check: after the cleanup, `ls` at the repo root should show only documented project directories. No `.cidfile`, no `.pid`, no random temp dir names. + +**Non-goals** + +* Do not touch `.jaiph/runs/`, `dist/`, `node_modules/` — already in `.gitignore` and load-bearing. +* Do not delete the `docker/` directory (singular, no suffix) — that is a different, intentional location. +* No code changes; this task is filesystem hygiene only. + +**Acceptance criteria** + +* Repo root listing contains zero `docker-*`, `nested-*`, `overlay-*`, `local-*`, or `.tmp*` directories after the change. +* `.gitignore` contains the patterns listed above; `git status` is clean immediately after deletion. +* Disposition of `safe_name`, `lib/`, `run/` is recorded in the commit message (deleted, kept-and-why). +* A second `npm run build && npm test` after the cleanup passes (proves nothing important was removed). + +*** + +## Cleanup — remove dead per-call-isolated leftovers from `src/runtime/docker.ts` #dev-ready + +**Goal** +`src/runtime/docker.ts` (688 LoC) still exports four functions written exclusively for the now-abandoned per-call `isolated` keyword: `exportWorkspacePatch`, `findRunArtifacts`, plus the helper `exportPatchIfDocker` in `src/runtime/kernel/node-workflow-runtime.ts`. These have one or two live callers each, all of which are themselves transitional code from the same abandoned design. Once the new `artifacts.jh` lib has landed (it replaces the use case end-to-end), these can go. Net reduction: ~200 LoC of source + ~150 LoC of dead tests in `src/runtime/docker.test.ts`. + +**Context (read before starting)** + +* `exportWorkspacePatch(workspaceDir, outputPath)` writes a `git diff` patch when running inside the Docker sandbox. Single live caller: `NodeWorkflowRuntime.exportPatchIfDocker()` (in `src/runtime/kernel/node-workflow-runtime.ts`), which writes `/workspace.patch` at workflow end. The new `artifacts.save_patch()` workflow in `.jaiph/libs/jaiphlang/artifacts.jh` (shipped by the artifacts task) replaces this use case explicitly: callers who want a patch ask for one by name, with the path returned to them. +* `findRunArtifacts(sandboxRunDir)` discovers the latest run dir under a Docker-mounted artifacts area. Single live caller: `src/cli/commands/run.ts:367` — the host reads it after the sandbox exits to surface the inner run's artifacts. With the artifacts task's explicit `JAIPH_ARTIFACTS_DIR` mount and known path, this discovery is no longer needed: the host already knows where to look. +* The `isolated` keyword is not part of this codebase. There is no per-call isolation primitive to keep these helpers alive for. + +**Scope** + +* **Precondition check**: before deleting, run `rg 'exportWorkspacePatch|findRunArtifacts|exportPatchIfDocker' src/` and verify the only callers are the ones listed above. If any new caller has appeared, evaluate it on the spot — either it is also dead and can go in this task, or removal is blocked and you stop and report. +* **Precondition check**: confirm the artifacts task has shipped (look for `.jaiph/libs/jaiphlang/artifacts.jh` and a working `artifacts.save_patch`). If it has not, this task is not ready — do not attempt half-removal that breaks the runtime. +* Remove from `src/runtime/docker.ts`: + - `exportWorkspacePatch` (function + export) + - `findRunArtifacts` (function + export) +* Remove from `src/runtime/kernel/node-workflow-runtime.ts`: + - `exportPatchIfDocker` (private method) + - The import of `exportWorkspacePatch` from `../docker` + - Any call site of `exportPatchIfDocker` (verify zero remain after the method is gone) +* Remove from `src/cli/commands/run.ts`: + - The `findRunArtifacts(sandboxRunDir)` call at line ~367 + - The import of `findRunArtifacts` + - Any code that consumes the result of `findRunArtifacts` and is now dead (chase the value, do not leave dangling variables) +* Remove from `src/runtime/docker.test.ts`: + - All `findRunArtifacts: ...` test cases + - All `exportWorkspacePatch: ...` test cases + - The shared test fixtures used only by those tests + +**Non-goals** + +* Do not touch `writeOverlayScript`, `overlayMountPath`, `buildDockerArgs`, or other docker.ts functions — those remain load-bearing for the whole-program Docker sandbox. +* Do not modify the artifacts lib or its runtime mount; this task only removes the predecessor primitives. +* Do not collapse env vars or config keys — that is a separate concern explicitly out of scope. + +**Acceptance criteria** + +* `rg 'exportWorkspacePatch|findRunArtifacts|exportPatchIfDocker' src/` returns zero matches. +* `npm run build` succeeds with no TypeScript errors after removal. +* `npm test` passes (proves no remaining test depends on the deleted primitives). +* Net diff: ~200 LoC removed from `src/runtime/docker.ts` and `src/runtime/kernel/node-workflow-runtime.ts`, ~150 LoC of dead tests removed from `src/runtime/docker.test.ts`. If your diff is materially smaller, you missed something; if materially larger, you are deleting more than the task scope — stop and reassess. + +*** + +## Cleanup — consolidate the 5-way test directory split #dev-ready + +**Goal** +Today there are five different places that contain "tests": `src/**/*.test.ts` (66 unit tests, adjacent to source), `test/` (4 integration files including a 2427-LoC `sample-build.test.ts`), `tests/e2e-samples/` (a single Playwright file), `compiler-tests/` (txtar fixtures), `golden-ast/` (fixtures + expected). Plus runners `src/compiler-test-runner.ts` and `src/golden-ast-runner.ts` mixed into the production source tree. A new contributor cannot tell where a new test belongs without reading the whole layout. Fix the structure in one pass. + +**Context (read before starting)** + +* The current `package.json` `test` script enumerates the test sources explicitly; this gives us a precise inventory of what is wired in: + ``` + dist/test/*.test.js + dist/src/**/*.test.js + dist/src/**/*.acceptance.test.js + dist/src/compiler-test-runner.js + dist/src/golden-ast-runner.js + ``` + Any move must update this script and keep the same test set running. Adding tests is out of scope; this is purely reorganization. +* `src/compiler-test-runner.ts` and `src/golden-ast-runner.ts` are compiled and shipped in `dist/`, but they are test infrastructure (they consume fixtures, produce assertions). They should not live in `src/`. +* `compiler-tests/README.md` already documents the txtar format — preserve that doc next to the fixtures it describes. + +**Scope** + +* **Move test infrastructure out of `src/`**: + - `src/compiler-test-runner.ts` → `test-infra/compiler-test-runner.ts` + - `src/golden-ast-runner.ts` → `test-infra/golden-ast-runner.ts` + - `tsconfig.json` and `package.json` `test` script updated to reference the new locations. +* **Rename and group fixture directories**: + - `compiler-tests/` → `test-fixtures/compiler-txtar/` (preserves the README inside). + - `golden-ast/` → `test-fixtures/golden-ast/` (preserves the `fixtures/` and `expected/` subdirs underneath). + - Update path references in `test-infra/compiler-test-runner.ts` and `test-infra/golden-ast-runner.ts`. +* **Fold the singleton Playwright test**: + - `tests/e2e-samples/landing-page.spec.ts` → `e2e/playwright/landing-page.spec.ts`. + - Update `playwright.config.ts` and the `test:samples` npm script accordingly. + - Delete the now-empty `tests/` directory. +* **Triage `test/` (4 files, 2960 LoC)**: + - `test/run-summary-jsonl.test.ts` (178 LoC), `test/signal-lifecycle.test.ts` (220 LoC), `test/tty-running-timer.test.ts` (135 LoC) — keep in a renamed `integration/` directory. They are integration-flavored, not unit, and don't have an obvious adjacent home. + - `test/sample-build.test.ts` (2427 LoC) — split. Read the file, group its tests by which subsystem they actually exercise, and move each group either next to that subsystem (`src/.../.integration.test.ts`) or into `integration/sample-build/.test.ts`. Aim for no resulting file over ~600 LoC. The split is the work; it is not optional. + - Move `test/expected/` and `test/fixtures/` to `test-fixtures/sample-build/` if any test still references them after the split. +* **Final layout** (target): + ``` + src/**/*.test.ts # unit, adjacent (unchanged) + src/**/*.acceptance.test.ts # acceptance, adjacent (unchanged) + integration/**/*.test.ts # integration tests (was `test/`, after split) + test-fixtures/compiler-txtar/ # was `compiler-tests/` + test-fixtures/golden-ast/ # was `golden-ast/` + test-fixtures/sample-build/ # if any sample-build fixtures survive the split + test-infra/compiler-test-runner.ts # was `src/compiler-test-runner.ts` + test-infra/golden-ast-runner.ts # was `src/golden-ast-runner.ts` + e2e/ # shell + .jh (unchanged) + e2e/playwright/landing-page.spec.ts # was `tests/e2e-samples/` + ``` + Three test "places" instead of five (`src/`-adjacent, `integration/`, `e2e/`); plus two clearly named support directories (`test-fixtures/`, `test-infra/`). +* Update `package.json` `test`, `test:compiler`, `test:golden-ast`, `test:samples`, `test:acceptance`, `test:ci`, `test:e2e` scripts to reference the new paths. Verify by running `npm test` end-to-end. + +**Non-goals** + +* Do not change any test's logic, assertions, or fixtures' contents. The goal is layout, not behavior. +* Do not change the unit-tests-adjacent-to-source convention. That part works. +* Do not delete any test (other than ones absorbed into the `sample-build.test.ts` split, where the original file goes away after redistribution). + +**Acceptance criteria** + +* `npm test` passes with the same test count (or higher, if the `sample-build` split surfaces previously-bundled cases as separate tests). Test count must not decrease. +* No file in `src/` is named `*-test-runner.ts`. Test infrastructure lives only in `test-infra/`. +* No file under `integration/` exceeds ~600 LoC after the `sample-build` split. +* The repo root no longer has both `test/` and `tests/`. (`tests/` is deleted after folding.) +* `package.json` test scripts reference the new paths and the same test set runs in CI. +* Commit message documents the file-move map (old → new) so reviewers can sanity-check that nothing was lost. + +*** + +## Refactor — split `src/runtime/kernel/node-workflow-runtime.ts` (1720 LoC) #dev-ready + +**Goal** +`src/runtime/kernel/node-workflow-runtime.ts` is a 1720-LoC god file: ~280 LoC of free arg-parsing helpers above the class, then a 1440-LoC `NodeWorkflowRuntime` class with 25 methods spanning workflow orchestration, step execution, prompt step lifecycle, event emission, mock execution, frame stack management, and heartbeat I/O. Reading or modifying any one concern requires holding all of them in head. Split along clean seams so each concern is in a focused module. + +**Context (read before starting)** + +* This file is actively touched by the `Handle` task. If that task is in flight, **rebase on it before splitting** — do not do this work in parallel without coordinating, or the merge will be miserable. +* The class has stateful internals (`runId`, `runDir`, `summaryFile`, `heartbeatTimer`, `frameStack`, `asyncIndices`, `env`, `cwd`, `graph`, `mockBodies`). The split must keep state in the class and move stateless helpers out, or pass state explicitly into the extracted modules. Do not invent a second source of truth. +* Free helpers above the class (`interpolate`, `parseInlineCaptureCall`, `commaArgsToInterpolated`, `parseArgsRaw`, `parseInlineScriptAt`, `parseManagedArgAt`, `parseArgTokens`, `stripOuterQuotes`, `parsePromptSchema`, `BARE_IDENT_RE`, `MAX_EMBED`, `MAX_RECURSION_DEPTH`, `sanitizeName`, `nowIso`) — all stateless. Safe to extract. +* Methods that are pure event emission (`emitWorkflow`, `emitStep`, `emitPromptStepStart`, `emitPromptStepEnd`, `emitPromptEvent`, `emitLog`) all call `appendRunSummaryLine` and `process.stderr.write`. They depend on the class only for `runId`, `summaryFile`, and `getAsyncIndices()`. Can move to a module that takes those as constructor args. +* Mock execution methods (`executeMockBodyDef`, `executeMockShellBody`) are largely self-contained and could move to a sibling module. + +**Scope** + +Extract three new sibling modules under `src/runtime/kernel/`: + +* **`runtime-arg-parser.ts`** — every stateless free helper currently above the `NodeWorkflowRuntime` class: + - `interpolate`, `parseInlineCaptureCall`, `commaArgsToInterpolated`, `parseArgsRaw`, `parseInlineScriptAt`, `parseManagedArgAt`, `parseArgTokens`, `stripOuterQuotes`, `parsePromptSchema`, `sanitizeName`, `nowIso` + - The `BARE_IDENT_RE`, `MAX_EMBED`, `MAX_RECURSION_DEPTH` constants + - The `ParsedArgToken`, `PromptSchemaField` types if they are not used elsewhere in the class + - **Required**: extracted helpers must have unit tests (some already do indirectly via runtime tests; new direct tests live in `runtime-arg-parser.test.ts`). +* **`runtime-event-emitter.ts`** — a small class `RuntimeEventEmitter` constructed with `{ runId, asyncIndicesGetter, env }`, exposing `emitWorkflow`, `emitStep`, `emitPromptStepStart`, `emitPromptStepEnd`, `emitPromptEvent`, `emitLog`. The runtime constructs one and delegates. No more direct `process.stderr.write(__JAIPH_EVENT__ ...)` scattered through the runtime. +* **`runtime-mock.ts`** — `executeMockBodyDef` and `executeMockShellBody` move here as exported functions taking `{ ref, args, env, cwd, executeStepsBack }` (the last is a callback so the mock can dispatch back into the runtime for `kind: "steps"` mocks). Removes the `require("node:child_process")` and `require("node:fs")` calls that currently shadow ESM imports inside the class body — that is a code smell that should die in this task. + +After the split, `node-workflow-runtime.ts` keeps only: +* The `NodeWorkflowRuntime` class +* Workflow/step orchestration (`runDefault`, `runNamedWorkflow`, `executeSteps`, `executeStep`, frame and scope management) +* The async-handle bookkeeping (`getAsyncIndices`, `getFrameStack`) +* Heartbeat (`startHeartbeat`, `stopHeartbeat`, `writeHeartbeat`) + +Target size for `node-workflow-runtime.ts` after split: ~900–1100 LoC. Still large, but a single coherent concern (the orchestrator). + +**Non-goals** + +* Do not change behavior. Every existing test must still pass without modification. +* Do not redesign the event format, the mock contract, or the arg-parser's accepted syntax. This is a relocation task only. +* Do not split further than the three new modules listed. Over-decomposition is its own problem; this task is calibrated for one round of splitting. +* Do not touch `node-workflow-runner.ts` (the CLI shim) or `run-step-exec.ts` (subprocess plumbing) — those are already correctly sized and out of scope. + +**Acceptance criteria** + +* `src/runtime/kernel/node-workflow-runtime.ts` is between 900 and 1100 LoC after the split. +* `src/runtime/kernel/runtime-arg-parser.ts`, `runtime-event-emitter.ts`, `runtime-mock.ts` exist and own their respective concerns. +* `runtime-arg-parser.test.ts` exists with direct unit tests for the extracted helpers. +* `npm test` passes with no test changes other than possibly importing helpers from their new location. +* No `require("node:...")` calls inside class methods (they are replaced by top-of-file `import` statements as part of the mock extraction). +* The new modules have no circular imports back into `node-workflow-runtime.ts`. Dependency direction is one-way: orchestrator → helpers/emitter/mock. + +*** From f055601125cf62668adeaa82fe4898751b82cd30 Mon Sep 17 00:00:00 2001 From: Jakub Dzikowski Date: Mon, 20 Apr 2026 14:26:55 +0200 Subject: [PATCH 12/38] Feat: Add recover loop semantics for non-isolated run Introduce `recover` as a first-class repair-and-retry primitive for `run` steps, distinct from the existing one-shot `catch`. When a run step fails, the recover block binds the error, executes a repair body, and retries the step in a loop until it succeeds or the retry limit (default 10, configurable via `config`) is exhausted. Covers parser, formatter, validator, runtime, e2e acceptance test, and docs-site syntax highlighting for the new keyword. Co-Authored-By: Claude Opus 4.6 --- CHANGELOG.md | 1 + QUEUE.md | 39 ----- docs/assets/js/main.js | 1 + docs/configuration.md | 2 + docs/grammar.md | 52 ++++++- docs/index.html | 86 ++++++----- docs/jaiph-skill.md | 14 +- docs/language.md | 47 +++++- e2e/test_all.sh | 1 + e2e/tests/130_run_recover_loop.sh | 113 ++++++++++++++ src/cli/run/progress.ts | 7 +- src/format/emit.test.ts | 37 +++++ src/format/emit.ts | 18 ++- src/parse/metadata.ts | 7 + src/parse/parse-steps.test.ts | 117 ++++++++++++++- src/parse/steps.ts | 157 ++++++++++++++++++++ src/parse/workflow-brace.ts | 7 +- src/parse/workflows.ts | 9 +- src/runtime/kernel/node-workflow-runtime.ts | 27 ++++ src/transpile/validate.ts | 12 ++ src/types.ts | 6 +- test/sample-build.test.ts | 154 +++++++++++++++++++ 22 files changed, 822 insertions(+), 92 deletions(-) create mode 100755 e2e/tests/130_run_recover_loop.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index ccd2e664..f6bb9448 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## All changes +- **Feature — Language/Runtime:** `recover` loop semantics for `run` steps — `recover` is a new first-class repair-and-retry primitive for `run` steps, distinct from `catch`. Syntax: `run ref() recover(err) { … }`. On failure, the binding receives merged stdout+stderr, the repair body executes, and the target is retried automatically. The loop stops when the target succeeds or the retry limit is exhausted. The default retry limit is 10; override per-module with `run.recover_limit` in a `config` block. `catch` remains unchanged (one-shot recovery). `recover` and `catch` are mutually exclusive on the same step. Supported for non-isolated, non-async `run` in workflows only. The docs-site syntax highlighter (`docs/assets/js/main.js`) recognizes `recover` as a keyword. Implementation: `recoverLoop` field on `WorkflowStepDef` in `src/types.ts`, `parseRunRecoverStep` in `src/parse/steps.ts`, retry loop in `NodeWorkflowRuntime`, `run.recover_limit` config key in `src/parse/metadata.ts`, formatter round-trip in `src/format/emit.ts`, validation in `src/transpile/validate.ts`. Parser, formatter, validation, runtime, and E2E tests added. Docs updated (`docs/language.md`, `docs/grammar.md`, `docs/configuration.md`, `docs/jaiph-skill.md`, `docs/index.html`). - **Feature — Docker:** Workspace immutability contract and patch export — Docker runs now enforce an explicit immutability contract: the host workspace is bind-mounted read-only and the writable `/jaiph/workspace` inside the container is a sandbox-local copy-on-write layer discarded on exit. The only persistence channel from a Docker run to the host is the run-artifacts directory (`/jaiph/run` → host `.jaiph/runs`). During teardown, the runtime automatically exports a `workspace.patch` file (best-effort `git diff --binary` after `git add -N .`) into the run directory so sandbox edits can be reviewed or applied on the host with `git apply`. Patch export is runtime teardown behavior owned by `NodeWorkflowRuntime`, not workflow logic — it runs regardless of workflow exit status and failures are reported on stderr without changing the workflow's reported status. When there are no workspace changes, `workspace.patch` is omitted (not created). Non-Docker (local) runs are unaffected. Implementation: `exportWorkspacePatch()` in `src/runtime/docker.ts`, `exportPatchIfDocker()` in `src/runtime/kernel/node-workflow-runtime.ts`. Unit tests for non-empty patch, empty patch, and non-git directory added. Docs updated (`docs/sandboxing.md`, `docs/architecture.md`, `docs/artifacts.md`). - **Feature — Docker:** Default Docker when not CI or unsafe — Docker sandboxing is now **on by default** for local development. When neither `CI=true` nor `JAIPH_UNSAFE=true` is set in the environment, `runtime.docker_enabled` defaults to `true`. In CI environments or when `JAIPH_UNSAFE=true` is set, the default is `false`. Explicit overrides (`JAIPH_DOCKER_ENABLED` env var or in-file `runtime.docker_enabled`) always take precedence over the default rule. `JAIPH_UNSAFE=true` is the new explicit escape hatch for local development when Docker is unwanted. Implementation: `resolveDockerConfig()` in `src/runtime/docker.ts`. Unit tests for all env combinations added. Docs updated (`docs/sandboxing.md`, `docs/configuration.md`). - **Feature — Docker:** Harden Docker execution environment — Docker sandboxing now enforces least-privilege defaults and explicit boundary controls. Containers launch with `--cap-drop ALL --cap-add SYS_ADMIN --security-opt no-new-privileges`, dropping all Linux capabilities except the one required for fuse-overlayfs and preventing privilege escalation. A mount denylist rejects dangerous host paths (`/`, `/var/run/docker.sock`, `/run/docker.sock`, `/proc`, `/sys`, `/dev` and their subpaths) at validation time with `E_VALIDATE_MOUNT` — both in `validateMounts` and at `buildDockerArgs` time. An environment variable denylist (`SSH_*`, `GPG_*`, `AWS_*`, `GCP_*`, `AZURE_*`, `GOOGLE_*`, `DOCKER_*`, `KUBE*`, `NPM_TOKEN*`) prevents host credentials from leaking into the container; only `JAIPH_*` (except `JAIPH_DOCKER_*`) and agent prefixes (`ANTHROPIC_*`, `CLAUDE_*`, `CURSOR_*`) cross the container boundary. New exports: `validateMountHostPath`, `isEnvDenied`, `ENV_DENYLIST_PREFIXES`. Documentation adds a threat-model section (what Docker does and does not protect against), a failure-modes reference table (`E_DOCKER_*` / `E_VALIDATE_MOUNT` / `E_TIMEOUT`), expanded network-mode guidance, and the env denylist specification. Implementation: `src/runtime/docker.ts` (mount denylist, env denylist, security flags), `src/runtime/docker.test.ts` (unit tests for all new paths). Docs updated (`docs/sandboxing.md`). diff --git a/QUEUE.md b/QUEUE.md index 68afd335..f4b47f6f 100644 --- a/QUEUE.md +++ b/QUEUE.md @@ -13,45 +13,6 @@ Process rules: *** -## Language/Runtime — add `recover` loop semantics for non-isolated `run` #dev-ready - -**Goal** -Add `recover` as a first-class repair-and-retry primitive distinct from `catch`. Ship for non-isolated, non-async `run` first. Async composition lands in the next task, not here. - -**Scope** - -* Keep existing `catch` behavior as one-attempt try/catch. -* Add: - - ```jh - run sth() recover(err) { - ... - } - ``` - - with loop semantics: try, bind failure, run repair block, retry, stop on success or retry-limit exhaustion. -* Add a small explicit retry limit (default 10) with config override. -* Keep the runtime behavior simple and observable; do not introduce speculative control-flow abstractions. - -**Required tests** - -* Parser / formatter / validation coverage for `recover`. -* Runtime tests for: - - success on first attempt - - one or more repair loops before success - - retry limit exhaustion - - retry limit configured via `config` -* At least one acceptance test using `recover` to repair and retry a failing run. - -**Acceptance criteria** - -* `recover` is distinct from `catch`. -* The retry limit is explicit and configurable. -* Tests prove loop behavior and limit handling. -* The docs-site Jaiph syntax highlighter (`docs/assets/js/main.js`, the `STATEMENT_KEYWORDS` set and any keyword-flow special cases) recognizes `recover` as a keyword. Any `.jh` code block on the docs site that uses `recover` renders with the keyword colored. - -*** - ## Runtime — spec and implement `Handle` for `run async`, including `recover` composition #dev-ready **Goal** diff --git a/docs/assets/js/main.js b/docs/assets/js/main.js index 79a707ae..13beb83b 100644 --- a/docs/assets/js/main.js +++ b/docs/assets/js/main.js @@ -21,6 +21,7 @@ "test", "ensure", "catch", + "recover", "run", "prompt", "returns", diff --git a/docs/configuration.md b/docs/configuration.md index 5a1b2891..56fc5266 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -136,6 +136,7 @@ These control runtime behavior unrelated to the agent. | `run.logs_dir` | string | `.jaiph/runs` | `JAIPH_RUNS_DIR` | Step log directory. Relative paths are joined with the workspace root; absolute paths are used as-is. | | `run.debug` | boolean | `false` | `JAIPH_DEBUG` | Enables debug tracing for the run. | | `run.inbox_parallel` | boolean | `false` | `JAIPH_INBOX_PARALLEL` | Dispatch inbox route targets concurrently. See [Inbox — Parallel dispatch](inbox.md#parallel-dispatch). | +| `run.recover_limit` | integer | `10` | _(no env override)_ | Maximum number of retry attempts for `run … recover` loops before the step fails. See [Language — `recover`](language.md#recover--repair-and-retry-loop). | ### Module keys @@ -323,6 +324,7 @@ Quick reference for all in-file keys and their environment variable equivalents: | `run.logs_dir` | `JAIPH_RUNS_DIR` | | `run.debug` | `JAIPH_DEBUG` | | `run.inbox_parallel` | `JAIPH_INBOX_PARALLEL` | +| `run.recover_limit` | _(no env override)_ | | `runtime.docker_enabled` | `JAIPH_DOCKER_ENABLED` | | `runtime.docker_image` | `JAIPH_DOCKER_IMAGE` | | `runtime.docker_network` | `JAIPH_DOCKER_NETWORK` | diff --git a/docs/grammar.md b/docs/grammar.md index 521355d8..d3287bec 100644 --- a/docs/grammar.md +++ b/docs/grammar.md @@ -42,7 +42,7 @@ The compiler enforces these boundaries at every call site. Using a script where Jaiph enforces a strict boundary between orchestration and execution. Workflows and rules contain only Jaiph steps. Bash lives in `script` bodies. -- **Workflows** — Named sequences of Jaiph steps: `ensure`, `run`, `prompt`, `const`, `fail`, `return`, `log`/`logerr`, inbox `send` (`channel <- …`), `match`, `if`, `run async`, `ensure … catch`, and `run … catch`. Any line that is not a recognized step is a parse error — extract bash to a `script` and call it with `run`. +- **Workflows** — Named sequences of Jaiph steps: `ensure`, `run`, `prompt`, `const`, `fail`, `return`, `log`/`logerr`, inbox `send` (`channel <- …`), `match`, `if`, `run async`, `ensure … catch`, `run … catch`, and `run … recover`. Any line that is not a recognized step is a parse error — extract bash to a `script` and call it with `run`. - **Rules** — Named blocks of structured Jaiph steps: `ensure` (other rules), `run` (scripts only — not workflows), `const`, `match`, `if`, `fail`, `log`/`logerr`, `return "…"`, `ensure … catch`, `run … catch`. Rules cannot use `prompt`, inbox send/route, or `run async`. @@ -392,6 +392,46 @@ Syntax rules: - All call arguments must appear inside the parentheses **before** `catch`. - `catch` must be followed by at least one recovery step after the bindings. +### `run … recover` — Repair-and-Retry Loop + +`recover` adds loop semantics to a `run` step. Unlike `catch` (which runs the recovery body once and stops), `recover` retries the target after each repair attempt until it succeeds or the retry limit is exhausted. + +```jaiph +# Single-statement recover +run deploy() recover(err) run fix_deploy() + +# Block recover +run deploy(env) recover(err) { + log "Deploy failed: ${err}" + run auto_repair(env) +} +``` + +**Loop behavior:** + +1. Execute the `run` target. +2. If it succeeds, continue (the `recover` body never runs). +3. If it fails, bind merged stdout+stderr to the binding (e.g. `err`), execute the repair body, then go to step 1. +4. If the retry limit is reached and the target still fails, the step fails with the last error. + +**Retry limit:** Default is **10**. Override per-module with `run.recover_limit`: + +```jaiph +config { + run.recover_limit = 3 +} +``` + +**Bindings** follow the same rules as `catch`: +- Exactly one binding is required. The binding receives merged stdout+stderr from the failed execution. + +Syntax rules: +- `recover` must be followed by `()` — bare `recover` or `recover {` without bindings is `E_PARSE`. +- All call arguments must appear inside the parentheses **before** `recover`. +- `recover` must be followed by at least one recovery step after the bindings. +- `recover` and `catch` are mutually exclusive on the same `run` step. +- `recover` is not supported on `ensure` or `run async` steps. + ### `prompt` — Agent Interaction Sends text to the configured agent backend. The prompt body can be supplied in three forms: a single-line string literal, a bare identifier referencing an existing binding, or a triple-quoted multiline block. @@ -827,7 +867,7 @@ workflow_config = config_block ; (* optional per-workflow override; must appear before steps; only agent.* and run.* keys allowed; runtime.* and module.* yield E_PARSE *) -workflow_step = ensure_stmt | run_stmt | run_catch_stmt | run_async_stmt | prompt_stmt | prompt_capture_stmt +workflow_step = ensure_stmt | run_stmt | run_catch_stmt | run_recover_stmt | run_async_stmt | prompt_stmt | prompt_capture_stmt | const_decl_step | return_stmt | fail_stmt | log_stmt | logerr_stmt | send_stmt | match_stmt | if_stmt | comment_line ; @@ -867,6 +907,7 @@ logerr_stmt = "logerr" ( double_quoted_string | triple_quoted_block | IDENT ensure_stmt = "ensure" call_ref [ "catch" catch_bindings catch_body ] ; run_catch_stmt = "run" call_ref "catch" catch_bindings catch_body ; +run_recover_stmt = "run" call_ref "recover" recover_bindings recover_body ; run_stmt = "run" ( call_ref | inline_script ) ; call_ref = REF "(" [ call_args ] ")" ; (* parentheses always required *) call_arg = double_quoted_string | IDENT | "${" IDENT "}" @@ -881,7 +922,9 @@ returns_schema = "returns" double_quoted_string ; catch_bindings = "(" IDENT ")" ; (* failure payload *) catch_body = single_workflow_stmt | "{" { workflow_step } "}" ; -single_workflow_stmt = ensure_stmt | run_stmt | run_catch_stmt | prompt_stmt | prompt_capture_stmt +recover_bindings = "(" IDENT ")" ; (* failure payload — same as catch *) +recover_body = single_workflow_stmt | "{" { workflow_step } "}" ; +single_workflow_stmt = ensure_stmt | run_stmt | run_catch_stmt | run_recover_stmt | prompt_stmt | prompt_capture_stmt | const_decl_step | return_stmt | fail_stmt | log_stmt | logerr_stmt | send_stmt ; @@ -904,7 +947,7 @@ Validation rules: 4. **Unified namespace:** channels, rules, workflows, scripts, script import aliases, and top-level `const` share one namespace per module. 5. `ensure` must target a rule. `run` in a workflow targets a workflow or script. `run` in a rule targets a script only. These rules also apply to `return run` and `return ensure` forms. 6. Channel references in `send` must resolve to declared channels. Route targets on channel declarations must be workflows with exactly 3 parameters. Route declarations inside workflow bodies are rejected at parse time. -7. `ensure … catch` and `run … catch` argument ordering: all arguments inside parentheses before `catch`. +7. `ensure … catch`, `run … catch`, and `run … recover` argument ordering: all arguments inside parentheses before `catch`/`recover`. 8. Shell redirection (`>`, `|`, `&`) after `run`/`ensure` is rejected — use a script. 9. **Type crossing:** `string` and `script` are non-interchangeable primitive types (see [Types](#types)). `prompt` rejects script names; `run` rejects string consts; assigning a script to a `const` or interpolating a script name with `${…}` is rejected. Each crossing produces an actionable `E_VALIDATE` message. 10. **Explicit nested managed calls:** Bare call-like forms in argument position (`run foo(bar())`, `run foo(rule_bar())`) are rejected — add the missing `run` or `ensure` keyword. Bare inline script calls in arguments (`run foo(\`echo aaa\`())`) are also rejected — add `run`. Valid forms: `run foo(run bar())`, `run foo(ensure rule_bar())`, `run foo(run \`echo aaa\`())`. @@ -923,6 +966,7 @@ At runtime, the Node workflow runtime interprets the AST directly: - **Script isolation:** Managed subprocesses with only essential variables. Module-scoped variables not visible. - **Prompt + schema:** JSON extraction and schema validation via the JS kernel. Exit codes: 0=ok, 1=parse error, 2=missing field, 3=type mismatch. - **ensure/run … catch:** On failure, the recovery body runs **once**. There is no retry loop. Requires explicit bindings: `catch (failure) { … }`. The binding gets the merged stdout+stderr from the failed execution. +- **run … recover:** Repair-and-retry loop. On failure, the binding gets merged stdout+stderr, the repair body runs, and the target is retried. Loop stops on success or when `run.recover_limit` (default 10) is exhausted. Requires explicit bindings: `recover(err) { … }`. - **Recursion safety:** There is a hard recursion depth limit of 256. Exceeding it produces a runtime error. - **Assignment capture:** Rules and workflows use explicit `return "…"`. Scripts use stdout. - **`run async`:** Promise-based concurrency. Implicit join via `Promise.allSettled` before workflow returns. Failures aggregated. diff --git a/docs/index.html b/docs/index.html index 5c7793bb..9c7c629a 100644 --- a/docs/index.html +++ b/docs/index.html @@ -269,53 +269,55 @@

Samples

-

The ensure … catch pattern checks a rule and, on failure, - runs a recovery block. The catch (failure) binding captures - the merged stdout+stderr from the failed check. - Recovery runs once — for retries, the workflow calls itself - recursively (run default()).

+

The run … recover pattern is a first-class repair-and-retry loop. + When the target fails, the recover(err) body runs, then the target + is retried automatically. The loop stops on success or when the retry limit + (default 10, configurable via run.recover_limit) is exhausted.

#!/usr/bin/env jaiph
 
-# Recursive recovery: when a check fails, prompt an agent to fix it,
-# then retry via run default(). Jaiph CI uses the same pattern to
-# auto-fix failing tests — see .jaiph/ensure_ci_passes.jh
-script check_report = `test -f report.txt`
+# recover loop: when check() fails, fix() repairs the problem,
+# and Jaiph automatically retries check(). No manual recursion needed.
+script check_gate = `test -f .gate_passed`
 
-rule report_exists() {
-  run check_report()
+workflow check() {
+  run check_gate()
+}
+
+script do_fix = `touch .gate_passed`
+
+workflow fix() {
+  run do_fix()
 }
 
 workflow default() {
-  ensure report_exists() catch (failure) {
-    prompt "report.txt is missing. Create it with a short dummy summary."
-    run default()
+  run check() recover(err) {
+    run fix()
   }
 }
-

In the run below, report_exists fails once. The agent creates - report.txt, and the recursive run default() retries - successfully. +

In the run below, check() fails once. The recover body runs + fix(), and Jaiph retries check() which now succeeds.

➜  ./recover_loop.jh
 
 Jaiph: Running recover_loop.jh
 
 workflow default
-   rule report_exists
-  ·    script check_report
-  ·   ✗ script check_report (0s)
-  ✗ rule report_exists (0s)
-   prompt cursor "report.txt is missin..."
-   prompt cursor (5s)
-   workflow default
-  ·    rule report_exists
-  ·   ·    script check_report
-  ·   ·    script check_report (0s)
-  ·    rule report_exists (0s)
-   workflow default (0.1s)
-
-✓ PASS workflow default (5.5s)
-

Jaiph's own CI uses this same pattern to auto-fix failing tests — see - .jaiph/ensure_ci_passes.jh.

+ workflow check + · script check_gate + · ✗ script check_gate (0s) + ✗ workflow check (0s) + workflow fix + · script do_fix + · script do_fix (0s) + workflow fix (0s) + workflow check + · script check_gate + · script check_gate (0s) + workflow check (0s) + +✓ PASS workflow default (0.5s) +

For one-shot failure handling without retry, use catch instead. See + Language — recover.

Language See Inbox & Dispatch.

Failure recovery. ensure … catch and run … catch - handle failures inline: when a rule or script fails, the recovery body runs once - (like a catch clause). For retries, use explicit recursion. Both forms work in workflows - and rules. See Grammar. + handle failures inline: when a rule or script fails, the recovery body runs once. + For automatic repair-and-retry, use run … recover — a loop that retries + the target after each repair attempt (configurable limit, default 10). Both catch + and recover work in workflows. See Grammar.

Runtime

@@ -596,8 +599,15 @@

Jaiph workflows

Failure recovery: when the target fails, the recovery body runs once (like a catch clause). catch requires explicit bindings - in parentheses. Works in both workflows and rules. For retries, use explicit - recursion in the recovery body. + in parentheses. Works in both workflows and rules. +
+ +
run ref() recover (err) { … }
+
Repair-and-retry loop: when the target fails, the repair body runs and the target + is retried automatically. Stops on success or when the retry limit is exhausted + (default 10, configurable via run.recover_limit). recover + requires explicit bindings. Workflows only. See + Language.
match var { "lit" => …, /re/ => …, _ => … }
diff --git a/docs/jaiph-skill.md b/docs/jaiph-skill.md index f170b6ef..9d6964e8 100644 --- a/docs/jaiph-skill.md +++ b/docs/jaiph-skill.md @@ -26,11 +26,11 @@ The **JS kernel** (`src/runtime/kernel/`) handles **prompt** execution, **manage **Concepts:** - **Rules** — Structured checks: `ensure` (other **rules** only), `run` (**scripts** only — not workflows), `const`, `match`, `if`, `fail`, `log`/`logerr`, `return "…"` / `return run script()` / `return ensure rule()`, `ensure … catch`, `run … catch`. No raw shell lines, `prompt`, inbox send/route, or `run async`. Under `jaiph run`, rule bodies are executed **in-process** by the Node runtime; when a rule runs a **script**, that script is a normal managed subprocess (same as scripts from workflows) — see [Sandboxing](sandboxing.md). -- **Workflows** — Named sequences of **Jaiph-only** steps: `ensure`, `run`, `prompt`, `const`, `fail`, `return`, `log`/`logerr`, inbox **send** (`channel_ref <- …`), `match`, `if`, `run async`, `ensure … catch`, `run … catch`. Route declarations (`->`) belong at the top level on `channel` declarations, **not** inside workflow bodies — a `->` inside a body is a parse error. Unrecognized lines are errors — put bash in **`script`** definitions and call with `run`. +- **Workflows** — Named sequences of **Jaiph-only** steps: `ensure`, `run`, `prompt`, `const`, `fail`, `return`, `log`/`logerr`, inbox **send** (`channel_ref <- …`), `match`, `if`, `run async`, `ensure … catch`, `run … catch`, `run … recover`. Route declarations (`->`) belong at the top level on `channel` declarations, **not** inside workflow bodies — a `->` inside a body is a parse error. Unrecognized lines are errors — put bash in **`script`** definitions and call with `run`. - **Scripts** — Top-level **`script`** definitions are **bash (or shebang interpreter) source**, not Jaiph orchestration. Defined with `` script name = `body` `` (single-line backtick) or `` script name = ```[lang] ... ``` `` (fenced block). Double-quoted string bodies (`script name = "body"`) and bare identifier bodies (`script name = varName`) are **removed** — both produce parse errors with guidance to use backtick delimiters. The compiler treats all script bodies as **opaque text**: it does not parse lines as Jaiph steps, reject keywords, strip quotes, or validate cross-script calls. This means embedded `node -e` heredocs, inline Python, `const` assignments in JS, and any other valid shell construct compile without interference. Jaiph interpolation (`${...}`) is **forbidden** in **single-line backtick** script bodies — use `$1`, `$2` positional arguments to pass data from orchestration to scripts. In **fenced** (triple-backtick) blocks, `${...}` is passed through to the shell as standard parameter expansion (`${VAR}`, `${VAR:-default}`, etc.). A single-backtick body containing a newline is a hard parse error — use a fenced block for multi-line scripts. Use `return N` / `return $?` for exit status and **stdout** (`echo` / `printf`) for string data to callers. From a **workflow** or **rule**, call with **`run fn()`**. Can be exported (`export script name = ...`) for use by importing modules. Cannot be used with `ensure`, are not valid inbox route targets, and must not be invoked through `$(...)` or as a bare shell step. **Polyglot scripts:** use a fence lang tag (`` ``` ``) to select an interpreter — the tag maps directly to `#!/usr/bin/env `. Any tag is valid (no hardcoded allowlist). For example: `` ```node ``, `` ```python3 ``, `` ```ruby ``, `` ```lua ``. Alternatively, if no fence tag is present, the first non-empty body line may start with `#!` (e.g. `#!/usr/bin/env lua`), which becomes the script's shebang and the body is emitted verbatim (you cannot combine a fence tag with a manual shebang — that is an error). Without either, `#!/usr/bin/env bash` is used and the emitter applies only lightweight bash-specific transforms (`return` normalization, `local`/`export`/`readonly` spacing, import alias resolution). Scripts are extracted to a `scripts/` directory under the run output tree (`jaiph run --target ` sets that tree; without `--target` the CLI uses a temporary directory) and executed via **`JAIPH_SCRIPTS`**. **Inline scripts:** For trivial one-off commands, use `` run `body`(args) `` or `` run ```lang...body...```(args) `` directly in a workflow or rule step instead of declaring a named `script` definition. The body (single backtick for one-liners or triple backtick for multi-line) comes before the parentheses; optional comma-separated arguments go inside the parentheses: `` run `echo $1`("hello") ``. Fenced blocks support lang tags for polyglot inline scripts: `` run ```python3 ... ```() ``. Capture forms: `` const x = run `echo val`() `` and `` const x = run ```...```() ``. The old `run script() "body"` form is **removed** — use the backtick forms instead. Inline scripts use deterministic hash-based artifact names (`__inline_`) and run with the same isolation as named scripts. `run async` with inline scripts is not supported. - **Channels** — Top-level `channel [-> workflow, ...]` declarations with optional inline routing; **send** uses `channel_ref <- …`. Routes are declared on the channel declaration, not inside workflow bodies (see [Inbox & Dispatch](inbox.md)). Channel names share the per-module namespace with rules, workflows, scripts, and module-scoped `local` / `const` variables. -Step semantics (`ensure`, `run`, `prompt`, `catch`, `match`, `if`, `log`, `fail`, `return`, `send`, `run async`) are detailed in the **Steps** section below. +Step semantics (`ensure`, `run`, `prompt`, `catch`, `recover`, `match`, `if`, `log`, `fail`, `return`, `send`, `run async`) are detailed in the **Steps** section below. **Audience:** Agents that produce or edit `.jh` files. @@ -95,7 +95,7 @@ Prefer composable modules over one large file. - **Module-scoped variables:** `local name = value` or `const name = value` (same value forms). Prefer **`const`** for new files. Values can be single-line `"..."` strings, triple-quoted `"""..."""` multiline strings, or bare tokens. A double-quoted string that spans multiple lines is rejected — use `"""..."""` instead. Accessible as `${name}` inside orchestration strings in the same module. Names share the unified namespace with channels, rules, workflows, and scripts — duplicates are `E_PARSE`. Not exportable; module-scoped only. - **Steps:** - **ensure** — `ensure ref` or `ensure ref([args...])` runs a rule (local or `alias.rule_name`). **Parentheses are optional when passing zero arguments** — `ensure check` is equivalent to `ensure check()`. When arguments are present, parentheses are required with comma-separated expressions. **Bare identifier arguments** are supported and preferred: `ensure check(status)` is equivalent to `ensure check("${status}")` — the identifier must reference a known variable (`const`, capture, or named parameter); unknown names fail with `E_VALIDATE`. **Standalone `"${identifier}"` in call arguments is rejected** — use the bare form instead. Optionally `ensure ref([args]) catch () ` or `ensure ref([args]) catch (, ) `: the recovery body runs **once** on failure (like a catch clause). There is no retry loop — for retries, use explicit recursion. The first binding (e.g. `failure`) receives the full merged stdout+stderr from the failed rule execution, including output from nested scripts and rules. The optional second binding (e.g. `attempt`) receives the attempt number (always `"1"`). Full output still lives in step **`.out` / `.err`** artifacts. If the failure binding is empty for your rule, persist diagnostics before prompting or assert non-empty. Works in both workflows and rules. - - **run** — `run ref` or `run ref([args...])` runs a workflow or script (local or `alias.name`). **Parentheses are optional when passing zero arguments** — `run setup` is equivalent to `run setup()`. When arguments are present, parentheses are required with comma-separated expressions. **`run` does not forward args by default** — pass named params explicitly (e.g. `run wf(task)`, `run util_fn(name)`). **Bare identifier arguments** are supported and preferred: `run greet(name)` is equivalent to `run greet("${name}")` — the identifier must reference a known variable (`const`, capture, or named parameter); unknown names fail with `E_VALIDATE`. **Standalone `"${identifier}"` in call arguments is rejected** — use the bare form instead (e.g. `run greet(name)` not `run greet("${name}")`). Quoted strings with additional text around the interpolation (e.g. `"prefix_${name}"`) are still allowed. Jaiph keywords cannot be used as bare identifiers. **Nested managed calls in arguments** are supported with explicit keywords: `run foo(run bar())`, `run foo(ensure check())`, `run foo(run \`echo ok\`())`. Bare call-like forms in arguments (`run foo(bar())`, `run foo(\`echo ok\`())`) are rejected — add the `run` or `ensure` keyword. Optionally `run ref([args]) catch () `: the recovery body runs **once** on failure (same semantics as `ensure … catch`). Works in both workflows and rules. Also supports **inline scripts**: `` run `body`(args) `` or `` run ```lang...body...```(args) `` — see Scripts section above. + - **run** — `run ref` or `run ref([args...])` runs a workflow or script (local or `alias.name`). **Parentheses are optional when passing zero arguments** — `run setup` is equivalent to `run setup()`. When arguments are present, parentheses are required with comma-separated expressions. **`run` does not forward args by default** — pass named params explicitly (e.g. `run wf(task)`, `run util_fn(name)`). **Bare identifier arguments** are supported and preferred: `run greet(name)` is equivalent to `run greet("${name}")` — the identifier must reference a known variable (`const`, capture, or named parameter); unknown names fail with `E_VALIDATE`. **Standalone `"${identifier}"` in call arguments is rejected** — use the bare form instead (e.g. `run greet(name)` not `run greet("${name}")`). Quoted strings with additional text around the interpolation (e.g. `"prefix_${name}"`) are still allowed. Jaiph keywords cannot be used as bare identifiers. **Nested managed calls in arguments** are supported with explicit keywords: `run foo(run bar())`, `run foo(ensure check())`, `run foo(run \`echo ok\`())`. Bare call-like forms in arguments (`run foo(bar())`, `run foo(\`echo ok\`())`) are rejected — add the `run` or `ensure` keyword. Optionally `run ref([args]) catch () `: the recovery body runs **once** on failure (same semantics as `ensure … catch`). Works in both workflows and rules. Optionally `run ref([args]) recover () `: repair-and-retry loop — on failure, binds error output, runs the repair body, and retries the target. Loop stops on success or when `run.recover_limit` (default 10) is exhausted. `recover` and `catch` are mutually exclusive on the same step. Workflows only. Also supports **inline scripts**: `` run `body`(args) `` or `` run ```lang...body...```(args) `` — see Scripts section above. - **log** — `log "message"` writes the expanded message to **stdout** and emits a **`LOG`** event; the CLI shows it in the progress tree at the current depth. Double-quoted string; `${identifier}` interpolation works at runtime. For multiline messages, use triple quotes: `log """..."""`. **Bare identifier form:** `log foo` (no quotes) expands to `log "${foo}"` — the variable's value is logged. Works with `const`, capture, and named parameters. **Inline capture interpolation** is also supported: `${run ref([args])}` and `${ensure ref([args])}` execute a managed call and inline the result (e.g. `log "Got: ${run greet()}"`). Nested inline captures are rejected. **`LOG`** events and `run_summary.jsonl` store the **same** message string (JSON-escaped for the payload). No spinner, no timing — a static annotation. See [CLI Reference](cli.md) for tree formatting. Useful for marking workflow phases (e.g. `log "Starting analysis phase"`). - **logerr** — `logerr "message"` is identical to `log` except the message goes to **stderr** and the event type is **`LOGERR`**. In the progress tree, `logerr` lines use a red `!` instead of the dim `ℹ` used by `log`. Same quoting, interpolation, bare identifier, and triple-quote rules as `log` (e.g. `logerr err_msg`, `logerr """..."""`). - **Send** — After `<-`, use a **double-quoted literal**, **triple-quoted block** (`channel <- """..."""`), **`${var}`**, or **`run ref([args])`**. An explicit RHS is always required — bare `channel <-` (forward syntax) has been removed. Raw shell on the RHS is rejected — use `const x = run helper()` then `channel <- "${x}"`, or `channel <- run fmt_fn()`. Combining capture and send (`name = channel <- …`) is `E_PARSE`. See [Inbox & Dispatch](inbox.md). @@ -111,12 +111,18 @@ Prefer composable modules over one large file. **Quick reference examples:** ```jaiph -# catch — failure handling with retry via recursion +# catch — one-shot failure handling ensure ci_passes() catch (failure) { prompt "CI failed — fix the code." run deploy(env) } +# recover — repair-and-retry loop (retries until success or limit) +run deploy(env) recover(err) { + log "Deploy failed: ${err}" + run auto_repair(env) +} + # match — value branching (statement and expression forms) const label = match status { "ok" => "success" diff --git a/docs/language.md b/docs/language.md index c1a82d16..e9528b76 100644 --- a/docs/language.md +++ b/docs/language.md @@ -192,7 +192,7 @@ workflow deploy(env, version) { } ``` -Workflows support all step types: `run`, `ensure`, `prompt`, `const`, `log`, `logerr`, `fail`, `return`, `send`, `match`, `if`, `run async`, and `catch`. +Workflows support all step types: `run`, `ensure`, `prompt`, `const`, `log`, `logerr`, `fail`, `return`, `send`, `match`, `if`, `run async`, `catch`, and `recover`. ### Rules @@ -355,6 +355,51 @@ workflow deploy(env) { Bare `catch` without a binding is a parse error. All call arguments must appear inside parentheses before `catch`. +### `recover` — Repair-and-Retry Loop + +`recover` is a first-class retry primitive for `run` steps. Unlike `catch` (which runs the recovery body once), `recover` implements a **loop**: try the target, and if it fails, bind the error, run the repair body, then retry. The loop stops when the target succeeds or when the retry limit is exhausted. + +```jaiph +# Single-statement recovery loop +run deploy() recover(err) run fix_deploy() + +# Block recovery loop +run deploy(env) recover(err) { + log "Deploy failed: ${err}" + run auto_repair(env) +} +``` + +**Semantics:** + +1. Execute the `run` target. +2. If it succeeds, continue (the `recover` body never runs). +3. If it fails, bind merged stdout+stderr to the `recover` binding (e.g. `err`), execute the repair body, then go to step 1. +4. If the retry limit is reached and the target still fails, the step fails with the last error. + +**Retry limit:** The default limit is **10** attempts. Override it per-module with the `run.recover_limit` config key: + +```jaiph +config { + run.recover_limit = 3 +} + +workflow default() { + run flaky_step() recover(err) { + log "Retrying after: ${err}" + run repair() + } +} +``` + +**Capture:** When the target eventually succeeds, `const name = run ref() recover(err) { … }` captures the result (same rules as plain `run` — `return` value for workflows, stdout for scripts). + +**Constraints:** +- `recover` requires exactly one binding: `recover(name)`. Bare `recover` without bindings is a parse error. +- All call arguments must appear inside parentheses **before** `recover`. +- `recover` is available on `run` steps in workflows only (not `ensure`, not `run async`). +- `recover` and `catch` are mutually exclusive on the same step — use one or the other. + ### `prompt` — Agent Interaction Sends text to the configured agent backend. Three body forms: diff --git a/e2e/test_all.sh b/e2e/test_all.sh index b3df284b..847791f2 100755 --- a/e2e/test_all.sh +++ b/e2e/test_all.sh @@ -79,6 +79,7 @@ TEST_SCRIPTS=( "e2e/tests/126_file_shorthand_routing.sh" "e2e/tests/127_cli_edge_cases.sh" "e2e/tests/128_examples_format_check.sh" + "e2e/tests/130_run_recover_loop.sh" ) PASS_COUNT=0 diff --git a/e2e/tests/130_run_recover_loop.sh b/e2e/tests/130_run_recover_loop.sh new file mode 100755 index 00000000..4d96a217 --- /dev/null +++ b/e2e/tests/130_run_recover_loop.sh @@ -0,0 +1,113 @@ +#!/usr/bin/env bash + +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +source "${ROOT_DIR}/e2e/lib/common.sh" +trap e2e::cleanup EXIT + +e2e::prepare_test_env "run_recover_loop" +TEST_DIR="${JAIPH_E2E_TEST_DIR}" + +# === Scenario: recover repairs then retries successfully === +e2e::section "recover loop: fail first, repair, pass on retry" +rm -f "${TEST_DIR}/.gate_passed" + +e2e::file "recover_repair.jh" <<'EOF' +script check_gate = `test -f .gate_passed` +workflow check() { + run check_gate() +} + +script do_fix = `touch .gate_passed` +workflow fix() { + run do_fix() +} + +workflow default() { + run check() recover(err) { + run fix() + } +} +EOF + +out="$(e2e::run "recover_repair.jh" 2>&1)" + +e2e::assert_file_exists "${TEST_DIR}/.gate_passed" "recover body ran (marker created)" +e2e::expect_stdout "${out}" <<'EOF' + +Jaiph: Running recover_repair.jh + +workflow default + ▸ workflow check + · ▸ script check_gate + · ✗ script check_gate (