From 265250fc5ce59a804a9840fc705601330384027b Mon Sep 17 00:00:00 2001
From: "g. nicholas d'andrea" <nick@gnidan.org>
Date: Thu, 16 Apr 2026 03:51:33 -0400
Subject: [PATCH 1/3] format: add transform context for compiler optimizations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a new context type annotating instructions with the
compiler transformations that produced them. The value is an
array of short identifiers; the list may repeat the same
identifier when the transformation has been applied multiple
times (e.g., ["inline", "inline"] for doubly-inlined code).

Transform is *additional* annotation. The invoke/return contexts
for the logical call are still emitted at the call boundary so
debuggers see the source-level call stack; the transform context
tells debuggers how the call was physically realized. Consumers
that ignore transform contexts get a sound source-level view
from the semantic contexts alone.

v1 identifiers:
  - "inline": marked instruction is part of an inlined function
    body; surrounding invoke/return contexts name the inlined
    callee.
  - "tailcall": marked instruction is a tail-call-optimized
    back-edge JUMP or continuation, where the call was realized
    without pushing/popping a full activation.

The identifier set is extensible. Debuggers unfamiliar with a
given identifier should preserve it as an opaque label. Order
in the array is not semantically significant — the multiset is
what matters.

Unblocks the final shape of TCO back-edge annotations in
bugc (#210): a tail-call-optimized JUMP can now carry
`gather: [return, invoke, transform: ["tailcall"]]`.

Includes:
- schemas/program/context/transform.schema.yaml
- schemas/program/context.schema.yaml: wire into the if/$ref
  union.
- packages/format/src/types/program/context.ts: Context.Transform
  interface, isTransform guard, and Transform.Identifier union
  preserving autocomplete for known values.
- packages/format/src/types/program/context.test.ts: register
  Context.isTransform with the schema guard test harness.
- packages/web/spec/program/context/transform.mdx: spec page
  covering role, v1 identifiers, repetition/composition, and
  interaction with gather.
---
 .../format/src/types/program/context.test.ts  |  4 +
 packages/format/src/types/program/context.ts  | 25 ++++-
 .../web/spec/program/context/transform.mdx    | 91 +++++++++++++++++++
 schemas/program/context.schema.yaml           |  8 ++
 schemas/program/context/transform.schema.yaml | 62 +++++++++++++
 5 files changed, 189 insertions(+), 1 deletion(-)
 create mode 100644 packages/web/spec/program/context/transform.mdx
 create mode 100644 schemas/program/context/transform.schema.yaml

diff --git a/packages/format/src/types/program/context.test.ts b/packages/format/src/types/program/context.test.ts
index 4470a322d..a36e1e4c7 100644
--- a/packages/format/src/types/program/context.test.ts
+++ b/packages/format/src/types/program/context.test.ts
@@ -46,4 +46,8 @@ testSchemaGuards("ethdebug/format/program/context", [
     schema: "schema:ethdebug/format/program/context/function/revert",
     guard: Context.isRevert,
   },
+  {
+    schema: "schema:ethdebug/format/program/context/transform",
+    guard: Context.isTransform,
+  },
 ] as const);
diff --git a/packages/format/src/types/program/context.ts b/packages/format/src/types/program/context.ts
index 104f27196..24c92a915 100644
--- a/packages/format/src/types/program/context.ts
+++ b/packages/format/src/types/program/context.ts
@@ -11,7 +11,8 @@ export type Context =
   | Context.Frame
   | Context.Invoke
   | Context.Return
-  | Context.Revert;
+  | Context.Revert
+  | Context.Transform;
 
 export const isContext = (value: unknown): value is Context =>
   [
@@ -24,6 +25,7 @@ export const isContext = (value: unknown): value is Context =>
     Context.isInvoke,
     Context.isReturn,
     Context.isRevert,
+    Context.isTransform,
   ].some((guard) => guard(value));
 
 export namespace Context {
@@ -274,4 +276,25 @@ export namespace Context {
       (!("reason" in value) || Function.isPointerRef(value.reason)) &&
       (!("panic" in value) || typeof value.panic === "number");
   }
+
+  export interface Transform {
+    transform: Transform.Identifier[];
+  }
+
+  export const isTransform = (value: unknown): value is Transform =>
+    typeof value === "object" &&
+    !!value &&
+    "transform" in value &&
+    Array.isArray(value.transform) &&
+    value.transform.length > 0 &&
+    value.transform.every(
+      (item) => typeof item === "string" && item.length > 0,
+    );
+
+  export namespace Transform {
+    // Recognized v1 identifiers. Unknown strings are permitted
+    // (the identifier set is extensible); the union preserves
+    // autocomplete for known values.
+    export type Identifier = "inline" | "tailcall" | (string & {});
+  }
 }
diff --git a/packages/web/spec/program/context/transform.mdx b/packages/web/spec/program/context/transform.mdx
new file mode 100644
index 000000000..81d88677b
--- /dev/null
+++ b/packages/web/spec/program/context/transform.mdx
@@ -0,0 +1,91 @@
+---
+sidebar_position: 8
+---
+
+import SchemaViewer from "@site/src/components/SchemaViewer";
+
+# Transform contexts
+
+A transform context annotates an instruction with the compiler
+transformations that produced it. The value is a list of short
+identifiers; the list may repeat the same identifier when the
+transformation has been applied multiple times—for example,
+doubly-inlined code carries `transform: ["inline", "inline"]`.
+
+<SchemaViewer
+  schema={{ id: "schema:ethdebug/format/program/context/transform" }}
+/>
+
+## Role: additional annotation
+
+A transform context does not replace semantic contexts. When the
+compiler inlines a function, the caller's debug info should still
+carry invoke/return contexts naming the inlined callee at the
+call boundary—so the debugger's logical call stack reflects the
+source-level structure. The transform context is _additional_
+information telling the debugger **how** the call was realized.
+
+Consumers are free to ignore transform contexts entirely; the
+invoke/return contexts alone always give a sound source-level
+view. Consumers that understand transform contexts can offer
+optimization-aware presentations:
+
+- Render inlined code as a collapsible block tied to the
+  original callee's source location.
+- Show which call sites were tail-call-optimized vs. realized as
+  full call/return sequences.
+- Explain apparent anomalies in the trace (e.g., a JUMP that
+  carries an invoke context is a TCO back-edge).
+
+## v1 identifiers
+
+Two identifiers are recognized in v1:
+
+- **`"inline"`** — the marked instruction is part of an inlined
+  function body. Surrounding invoke/return contexts name the
+  inlined callee; this marker tells the debugger the physical
+  code does not correspond to a separate activation record.
+- **`"tailcall"`** — the marked instruction is a
+  tail-call-optimized back-edge JUMP or continuation, where the
+  call was realized without pushing/popping a full activation.
+  A JUMP carrying a `tailcall` transform often co-occurs with a
+  `gather` context combining a return (from the previous
+  iteration) and an invoke (of the new iteration).
+
+The identifier set is extensible. Compilers may emit additional
+identifiers for optimizations not yet standardized; debuggers
+should preserve unfamiliar identifiers as opaque labels rather
+than rejecting them.
+
+## Repetition and composition
+
+Identifiers may repeat. A function inlined into another inlined
+function produces `transform: ["inline", "inline"]`. Different
+transformations compose:
+`transform: ["inline", "tailcall"]` marks an instruction inside
+an inlined body that was itself a TCO back-edge in the callee.
+
+Order in the array is not semantically significant—only the
+multiset of identifiers matters.
+
+## Composing with other contexts
+
+To carry a transform context alongside other contexts at the
+same instruction, use `gather`. A TCO back-edge JUMP, for
+example, typically combines three facts:
+
+```yaml
+gather:
+  - return:
+      identifier: "fact"
+      declaration: { ... }
+  - invoke:
+      jump: true
+      identifier: "fact"
+      target: { pointer: { location: code, offset: ... } }
+  - transform: ["tailcall"]
+```
+
+The return and invoke state the source-level facts (iteration N
+returned, iteration N+1 was invoked); the transform explains
+how the compiler realized that pair as a single JUMP.
diff --git a/schemas/program/context.schema.yaml b/schemas/program/context.schema.yaml
index a57fce654..1a82e76df 100644
--- a/schemas/program/context.schema.yaml
+++ b/schemas/program/context.schema.yaml
@@ -89,6 +89,14 @@ allOf:
       description: |
         Indicates association with a function revert.
       $ref: "schema:ethdebug/format/program/context/function/revert"
+  - if:
+      required: ["transform"]
+    then:
+      description: |
+        Compiler transformations applied to produce this instruction
+        (e.g., inlining, tail-call optimization). Additional
+        annotation — does not replace semantic contexts.
+      $ref: "schema:ethdebug/format/program/context/transform"
 
 unevaluatedProperties: false
 
diff --git a/schemas/program/context/transform.schema.yaml b/schemas/program/context/transform.schema.yaml
new file mode 100644
index 000000000..ea9ab4aaf
--- /dev/null
+++ b/schemas/program/context/transform.schema.yaml
@@ -0,0 +1,62 @@
+$schema: "https://json-schema.org/draft/2020-12/schema"
+$id: "schema:ethdebug/format/program/context/transform"
+
+title: ethdebug/format/program/context/transform
+description: |
+  Annotates an instruction with compiler transformations that
+  produced it. The value is a list of short identifiers naming
+  each transformation; the list may repeat an identifier when
+  the same transformation has been applied more than once (e.g.,
+  `["inline", "inline"]` for doubly-inlined code).
+
+  A transform context is *additional* annotation — it does not
+  replace semantic contexts. When the compiler inlines a
+  function, the invoke/return contexts for the logical call
+  should still be emitted at the call boundary so the debugger's
+  source-level call stack remains coherent. The transform
+  context tells debuggers **how** the call was realized.
+
+  Consumers that ignore transform contexts still get a sound
+  source-level view from the invoke/return contexts alone.
+  Consumers that understand transform contexts can offer
+  optimization-aware presentations — e.g., rendering inlined
+  code as a collapsible block, or reconciling tail-call-optimized
+  back-edges with the logical call stack.
+
+  The identifier set is extensible. v1 defines:
+
+  - `"inline"` — the marked instruction is part of an inlined
+    function body. Surrounding invoke/return contexts name the
+    inlined callee.
+  - `"tailcall"` — the marked instruction is a
+    tail-call-optimized back-edge JUMP or continuation, where
+    the call was realized as a direct jump (or reuse of the
+    caller's frame) rather than a standard call/return sequence.
+
+  Debuggers unfamiliar with a given identifier should preserve
+  it as an opaque label.
+
+  Order in the array is not semantically significant — only the
+  multiset of identifiers matters.
+
+type: object
+properties:
+  transform:
+    title: Applied transformations
+    description: |
+      List of transformation identifiers. Identifiers may
+      repeat; order is not semantically significant.
+    type: array
+    items:
+      type: string
+      minLength: 1
+    minItems: 1
+
+required:
+  - transform
+
+examples:
+  - transform: ["inline"]
+  - transform: ["tailcall"]
+  - transform: ["inline", "inline"]
+  - transform: ["inline", "tailcall"]

From 9562543291e3df2aa20705be1343165a777116d5 Mon Sep 17 00:00:00 2001
From: "g. nicholas d'andrea" <nick@gnidan.org>
Date: Thu, 16 Apr 2026 03:54:38 -0400
Subject: [PATCH 2/3] format: expand transform v1 vocabulary with fold and
 coalesce
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds two more identifiers to the v1 transform context
vocabulary, based on bugc optimizer's audit of transformations
the compiler currently performs or will perform:

  - "fold" — compile-time constant folding. The marked
    instruction carries the result (typically a PUSH) replacing
    a compute sequence that appeared in source.
  - "coalesce" — read-write merging. The marked instruction is
    part of a SHL/OR sequence (or similar) introduced by the
    compiler to combine adjacent source-level reads or writes,
    such as packing narrower fields into a single storage slot.

Together with the previously-defined "inline" and "tailcall",
this covers the four transformations bugc emits today or will
emit in the near term (inline once a function inlining pass
lands). Propagate was considered for v1 and deferred as
borderline.

Updates:
- transform.schema.yaml: description enumerates the four v1
  identifiers; examples include single-identifier cases for
  each plus combinations ["inline", "fold"], ["coalesce",
  "coalesce"].
- context.ts: Transform.Identifier union extended with "fold"
  and "coalesce" (still keeps `string & {}` for extensibility
  and autocomplete).
- transform.mdx: subsection for each identifier with a concrete
  EVM-level example, updated repetition/composition section
  with new combinations.
---
 packages/format/src/types/program/context.ts  |  7 ++++-
 .../web/spec/program/context/transform.mdx    | 28 ++++++++++++++++---
 schemas/program/context/transform.schema.yaml | 12 ++++++++
 3 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/packages/format/src/types/program/context.ts b/packages/format/src/types/program/context.ts
index 24c92a915..d9c96c23f 100644
--- a/packages/format/src/types/program/context.ts
+++ b/packages/format/src/types/program/context.ts
@@ -295,6 +295,11 @@ export namespace Context {
     // Recognized v1 identifiers. Unknown strings are permitted
     // (the identifier set is extensible); the union preserves
     // autocomplete for known values.
-    export type Identifier = "inline" | "tailcall" | (string & {});
+    export type Identifier =
+      | "inline"
+      | "tailcall"
+      | "fold"
+      | "coalesce"
+      | (string & {});
   }
 }
diff --git a/packages/web/spec/program/context/transform.mdx b/packages/web/spec/program/context/transform.mdx
index 81d88677b..b52341d21 100644
--- a/packages/web/spec/program/context/transform.mdx
+++ b/packages/web/spec/program/context/transform.mdx
@@ -39,7 +39,7 @@ optimization-aware presentations:
 
 ## v1 identifiers
 
-Two identifiers are recognized in v1:
+Four identifiers are recognized in v1:
 
 - **`"inline"`** — the marked instruction is part of an inlined
   function body. Surrounding invoke/return contexts name the
@@ -51,6 +51,21 @@ Two identifiers are recognized in v1:
   A JUMP carrying a `tailcall` transform often co-occurs with a
   `gather` context combining a return (from the previous
   iteration) and an invoke (of the new iteration).
+- **`"fold"`** — the marked instruction carries the result of
+  a compile-time constant fold. Typically a PUSH of the folded
+  value replacing a compute sequence (e.g., `ADD` over two
+  known constants) that appeared in source. The instruction's
+  surrounding `code` context, if present, points to the
+  original expression.
+- **`"coalesce"`** — the marked instruction is part of a
+  read-write merging sequence the compiler introduced to
+  combine adjacent source-level reads or writes. Common
+  examples include SHL/OR sequences that pack narrower fields
+  into a single storage slot, or wider loads split into
+  narrower field extractions. The user did not write these
+  instructions directly; the `coalesce` marker lets a debugger
+  present the sequence as one source-level operation rather
+  than stepping through each byte-shuffling opcode.
 
 The identifier set is extensible. Compilers may emit additional
 identifiers for optimizations not yet standardized; debuggers
@@ -60,10 +75,15 @@ than rejecting them.
 ## Repetition and composition
 
 Identifiers may repeat. A function inlined into another inlined
-function produces `transform: ["inline", "inline"]`. Different
-transformations compose:
+function produces `transform: ["inline", "inline"]`. A coalesce
+sequence nested inside another coalesced region produces
+`transform: ["coalesce", "coalesce"]`.
+
+Different transformations compose:
 `transform: ["inline", "tailcall"]` marks an instruction inside
-an inlined body that was itself a TCO back-edge in the callee.
+an inlined body that was itself a TCO back-edge in the callee;
+`transform: ["inline", "fold"]` marks a constant-folded PUSH
+sitting inside an inlined body.
 
 Order in the array is not semantically significant—only the
 multiset of identifiers matters.
diff --git a/schemas/program/context/transform.schema.yaml b/schemas/program/context/transform.schema.yaml
index ea9ab4aaf..313843da4 100644
--- a/schemas/program/context/transform.schema.yaml
+++ b/schemas/program/context/transform.schema.yaml
@@ -32,6 +32,14 @@ description: |
     tail-call-optimized back-edge JUMP or continuation, where
     the call was realized as a direct jump (or reuse of the
     caller's frame) rather than a standard call/return sequence.
+  - `"fold"` — the marked instruction carries the result of a
+    compile-time constant fold. Typically a PUSH of the folded
+    value, replacing a compute sequence that appeared in source.
+  - `"coalesce"` — the marked instruction is part of a
+    read-write merging sequence (e.g., SHL/OR sequences packing
+    narrower fields into a wider word) that the user did not
+    explicitly write; the compiler introduced it to combine
+    adjacent source-level reads or writes.
 
   Debuggers unfamiliar with a given identifier should preserve
   it as an opaque label.
@@ -58,5 +66,9 @@ required:
 examples:
   - transform: ["inline"]
   - transform: ["tailcall"]
+  - transform: ["fold"]
+  - transform: ["coalesce"]
   - transform: ["inline", "inline"]
   - transform: ["inline", "tailcall"]
+  - transform: ["inline", "fold"]
+  - transform: ["coalesce", "coalesce"]

From 7ac333d72ec0bff0ff8d7cb6ee0d24ff54a004d7 Mon Sep 17 00:00:00 2001
From: "g. nicholas d'andrea" <nick@gnidan.org>
Date: Thu, 16 Apr 2026 05:05:03 -0400
Subject: [PATCH 3/3] format: prefer flat context composition, document gather
 scope

The context schema's discriminator keys combine via allOf of
if/then rules, so a single context object can carry multiple
keys at once (e.g., `invoke`, `return`, and `transform` all
side by side). Use gather only when two contexts would collide
on the same key.

- transform spec: switch the TCO back-edge example from gather
  to the flat form; revise the tailcall bullet accordingly
- transform schema: note in the description that flat
  composition is preferred; gather is for key collisions
- gather spec: add a "When to use" section flagging the flat
  form as the default and listing the canonical collision
  cases (multiple frames, multiple variables blocks)
---
 packages/web/spec/program/context/gather.mdx  | 28 ++++++++++++
 .../web/spec/program/context/transform.mdx    | 44 +++++++++++--------
 schemas/program/context/transform.schema.yaml |  5 +++
 3 files changed, 59 insertions(+), 18 deletions(-)

diff --git a/packages/web/spec/program/context/gather.mdx b/packages/web/spec/program/context/gather.mdx
index eb9cc3613..0501ed70d 100644
--- a/packages/web/spec/program/context/gather.mdx
+++ b/packages/web/spec/program/context/gather.mdx
@@ -6,6 +6,34 @@ import SchemaViewer from "@site/src/components/SchemaViewer";
 
 # Gather multiple contexts
 
+A `gather` context asserts that every one of its child contexts
+holds at the marked instruction. It is the tool for composing
+multiple context facts that cannot coexist as sibling keys on a
+single object.
+
 <SchemaViewer
   schema={{ id: "schema:ethdebug/format/program/context/gather" }}
 />
+
+## When to use `gather`
+
+The context schema is open: a single context object may carry
+any number of discriminator keys together — `code`, `variables`,
+`invoke`, `return`, `transform`, and so on all compose as
+siblings on the same object. Prefer the flat form when it
+works.
+
+Reach for `gather` only when two or more facts would collide on
+the same key. The canonical cases are:
+
+- **Multiple `frame`s** — an instruction that maps
+  simultaneously to an IR step and a source step needs one
+  entry per frame, each with its own `code` range.
+- **Multiple `variables` blocks** — when separate pipeline
+  passes each contribute variable information (e.g., one
+  names the variable, the other supplies its pointer), each
+  set lives in its own context.
+
+If every child context uses a different discriminator key, a
+`gather` can be collapsed into a single flat object with the
+same meaning — and that flat form is the preferred style.
diff --git a/packages/web/spec/program/context/transform.mdx b/packages/web/spec/program/context/transform.mdx
index b52341d21..427e42eb6 100644
--- a/packages/web/spec/program/context/transform.mdx
+++ b/packages/web/spec/program/context/transform.mdx
@@ -48,9 +48,9 @@ Four identifiers are recognized in v1:
 - **`"tailcall"`** — the marked instruction is a
   tail-call-optimized back-edge JUMP or continuation, where the
   call was realized without pushing/popping a full activation.
-  A JUMP carrying a `tailcall` transform often co-occurs with a
-  `gather` context combining a return (from the previous
-  iteration) and an invoke (of the new iteration).
+  A JUMP carrying a `tailcall` transform typically sits on a
+  context that also carries both a `return` (from the previous
+  iteration) and an `invoke` (of the new iteration).
 - **`"fold"`** — the marked instruction carries the result of
   a compile-time constant fold. Typically a PUSH of the folded
   value replacing a compute sequence (e.g., `ADD` over two
@@ -90,22 +90,30 @@ multiset of identifiers matters.
 
 ## Composing with other contexts
 
-To carry a transform context alongside other contexts at the
-same instruction, use `gather`. A TCO back-edge JUMP, for
-example, typically combines three facts:
+A context object can carry several discriminator keys at once —
+`code`, `variables`, `invoke`, `return`, `transform`, and so on
+all live in the same object. A TCO back-edge JUMP, for example,
+typically combines three facts as sibling keys on a single
+context:
 
 ```yaml
-gather:
-  - return:
-      identifier: "fact"
-      declaration: { ... }
-  - invoke:
-      jump: true
-      identifier: "fact"
-      target: { pointer: { location: code, offset: ... } }
-  - transform: ["tailcall"]
+return:
+  identifier: "fact"
+  declaration: { ... }
+invoke:
+  jump: true
+  identifier: "fact"
+  target: { pointer: { location: code, offset: ... } }
+transform: ["tailcall"]
 ```
 
-The return and invoke state the source-level facts (iteration N
-returned, iteration N+1 was invoked); the transform explains
-how the compiler realized that pair as a single JUMP.
+The `return` and `invoke` state the source-level facts
+(iteration N returned, iteration N+1 was invoked); the
+`transform` explains how the compiler realized that pair as a
+single JUMP.
+
+Reach for [`gather`](/spec/program/context/gather) only when
+two contexts would collide on the same key — e.g., two
+independent `variables` blocks or two `frame`s from different
+pipeline stages. When keys don't collide, the flat form is
+preferred.
diff --git a/schemas/program/context/transform.schema.yaml b/schemas/program/context/transform.schema.yaml
index 313843da4..8951a00ca 100644
--- a/schemas/program/context/transform.schema.yaml
+++ b/schemas/program/context/transform.schema.yaml
@@ -16,6 +16,11 @@ description: |
   source-level call stack remains coherent. The transform
   context tells debuggers **how** the call was realized.
 
+  Combine a transform with other discriminator keys (`invoke`,
+  `return`, `code`, etc.) by placing them side-by-side on the
+  same context object — `gather` is only needed when two
+  contexts would collide on the same key.
+
   Consumers that ignore transform contexts still get a sound
   source-level view from the invoke/return contexts alone.
   Consumers that understand transform contexts can offer