From 2fd42f13a4833abc5869ca0f91ab439238d23d91 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 9 May 2026 18:41:59 +0000 Subject: [PATCH 01/21] Hotfix extern resource fields and pending BG promise tests --- spec/annotator_spec.rb | 21 +++++++++++++ spec/transpiler_spec.rb | 66 +++++++++++++++++++++++++++++++++++++++++ src/ast/schemas.rb | 7 +++-- 3 files changed, 91 insertions(+), 3 deletions(-) diff --git a/spec/annotator_spec.rb b/spec/annotator_spec.rb index 4443fa922..e9f7025d3 100644 --- a/spec/annotator_spec.rb +++ b/spec/annotator_spec.rb @@ -2945,6 +2945,27 @@ def annotate_extern(source) end end + context "EXTERN STRUCT declaration with CLOSE" do + let(:code) { + <<~CLEAR + EXTERN STRUCT JsonRecord { id: Int64, data: Int64[] }; + EXTERN STRUCT Parsed { + value: JsonRecord + } CLOSE "deinit" AS "Parsed(JsonRecord)" FROM "std.json"; + FN use_parsed(parsed: Parsed) RETURNS Int64 -> + RETURN parsed.value.id; + END + CLEAR + } + + it "keeps fields accessible on the resource wrapper" do + ast = annotate_extern(code) + fn = ast.statements.last + ret = fn.body.last + expect(ret.value.resolved_type).to eq(:Int64) + end + end + context "EXTERN STRUCT without FROM (local Zig struct)" do let(:code) { <<~CLEAR diff --git a/spec/transpiler_spec.rb b/spec/transpiler_spec.rb index 239926a84..6ea7edbbd 100644 --- a/spec/transpiler_spec.rb +++ b/spec/transpiler_spec.rb @@ -9,6 +9,72 @@ def transpile(src) ZigTranspiler.new.transpile(src) end + describe "BG promise capture regressions" do + it "allows footguns/06-style consumer BG to NEXT a producer promise captured from the same scope" do + pending("MIR capture classification currently refuses captured Promise handles as unclassified_capture") + src = <<~CLEAR + STRUCT State { + message: String + } + + FN main() RETURNS Void -> + s = State{ message: "" } @shared:locked; + + producer = BG { + WITH EXCLUSIVE s AS inner { + inner.message = "hello from producer"; + } + }; + + consumer = BG { + NEXT producer; + WITH s AS inner { + print(inner.message); + } + }; + + NEXT consumer; + END + CLEAR + + expect { transpile(src) }.not_to raise_error + end + + it "allows footguns/07-style relay BG to NEXT a producer promise captured from the same scope" do + pending("MIR capture classification currently refuses captured Promise handles as unclassified_capture") + src = <<~CLEAR + STRUCT Payload { + data: String + } + + FN main() RETURNS Void -> + result = Payload{ data: "" } @shared:locked; + + producer = BG { + WITH EXCLUSIVE result AS r { + r.data = "important result"; + } + }; + + relay = BG { + NEXT producer; + }; + + consumer = BG { + NEXT relay; + WITH result AS r { + print("consumer saw: " + r.data); + } + }; + + NEXT consumer; + END + CLEAR + + expect { transpile(src) }.not_to raise_error + end + end + # =========================================================================== # @list allocator selection # =========================================================================== diff --git a/src/ast/schemas.rb b/src/ast/schemas.rb index cebfde692..4d8c1ab79 100644 --- a/src/ast/schemas.rb +++ b/src/ast/schemas.rb @@ -105,9 +105,10 @@ def initialize(fields: {}, field_defaults: nil, borrowed_fields: nil, type_param sig { params(schema: T.untyped).returns(T.nilable(Schemas::StructSchema)) } def self.as_struct_schema(schema) return schema if schema.is_a?(StructSchema) - return nil unless schema.is_a?(Hash) && !schema[:kind] + return nil unless schema.is_a?(Hash) && (!schema[:kind] || schema[:kind] == :resource) + fields = schema[:fields] || schema.reject { |k, _| k.is_a?(Symbol) } StructSchema.new( - fields: schema.reject { |k, _| k.is_a?(Symbol) }, + fields: fields, field_defaults: schema[:field_defaults], borrowed_fields: schema[:borrowed_fields], type_params: schema[:type_params], @@ -136,7 +137,7 @@ def self.as_resource_schema(schema) ResourceSchema.new( close_zig: schema[:close_zig], static_methods: schema[:static_methods] || {}, - fields: schema[:fields] || {}, + fields: schema[:fields] || schema.reject { |k, _| k.is_a?(Symbol) }, type_params: schema[:type_params], extern_module: schema[:extern_module], as_type: schema[:as_type], From 1599bfb16875f17a8bcf1dfd8f19e604be438f72 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 9 May 2026 19:05:45 +0000 Subject: [PATCH 02/21] fix(mir): heap-promote escaping loop locals --- spec/loop_frame_analysis_spec.rb | 25 +++++++++++++++++++++++++ src/mir/control_flow.rb | 23 +++++++++++++++++++++-- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/spec/loop_frame_analysis_spec.rb b/spec/loop_frame_analysis_spec.rb index 614268da2..d8d60246a 100644 --- a/spec/loop_frame_analysis_spec.rb +++ b/spec/loop_frame_analysis_spec.rb @@ -514,6 +514,31 @@ def main_fn(ast) expect(zig).to include("restoreLoopMark") end + it "WhileLoop heap-promotes a loop-local string that escapes into an outer list" do + src = <<~CLEAR + FN isCommand(ch: String) RETURNS Bool -> + RETURN ch == ">" || ch == "<"; + END + + FN commands(program: String) RETURNS !String -> + MUTABLE parts: String[]@list = []; + MUTABLE i = 0; + WHILE i < program.length() DO + ch = program.charAt(i); + IF isCommand(ch) THEN + parts.append(ch); + END + i += 1; + END + RETURN parts.join(""); + END + CLEAR + + zig = nil + expect { zig = transpile(src) }.not_to raise_error + expect(zig).to include("heapAlloc") + end + it "ForRange emits saveLoopMark + defer restoreLoopMark for loop-local list" do src = <<~CLEAR FN main() RETURNS Void -> diff --git a/src/mir/control_flow.rb b/src/mir/control_flow.rb index f81b21c36..578794144 100644 --- a/src/mir/control_flow.rb +++ b/src/mir/control_flow.rb @@ -1295,10 +1295,14 @@ def self.process_loop!(loop_node, body) local_names = collect_local_names(body) - # Find frame-allocated local VarDecls that don't escape into outer containers. - non_escaping = local_frame_decls(body, local_names).reject do |decl| + # Find frame-allocated local VarDecls. Values that escape into an outer + # container cannot be protected by a per-iteration rewind, since that would + # invalidate the stored pointer. Promote those declarations to heap instead. + frame_decls = local_frame_decls(body, local_names) + escaping, non_escaping = frame_decls.partition do |decl| escapes_to_outer?(decl.name.to_s, body, local_names) end + escaping.each { |decl| promote_decl_to_heap!(decl) } loop_node.mark_per_iter = non_escaping.any? @@ -1549,6 +1553,21 @@ def self.promote_to_heap!(ident_node) end end + # Promote a frame-allocated declaration whose value escapes this loop. + sig { params(decl_node: T.untyped).returns(T.untyped) } + def self.promote_decl_to_heap!(decl_node) + decl_ti = Type.from_node(decl_node) + return unless decl_ti.is_a?(Type) + return unless decl_ti.list_collection? || decl_ti.map? || decl_ti.array? || decl_ti.string? + + decl_ti.provenance = :heap + decl_node.storage = :heap if decl_node.respond_to?(:storage=) + + value = decl_node.respond_to?(:value) ? decl_node.value : nil + promote_value_to_heap!(value) if value + value.storage = :heap if value && value.respond_to?(:storage=) + end + # Walk DIRECT body: yield each stmt, recurse into if/match/with but STOP at # nested loops and function definitions. sig { params(body: Array, block: T.untyped).returns(T.untyped) } From d80e653971191e08ab67358ac669f052748d6980 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 9 May 2026 19:12:14 +0000 Subject: [PATCH 03/21] fix(mir): narrow loop escape promotion to strings --- spec/loop_frame_analysis_spec.rb | 3 ++- src/mir/control_flow.rb | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/spec/loop_frame_analysis_spec.rb b/spec/loop_frame_analysis_spec.rb index d8d60246a..197404c33 100644 --- a/spec/loop_frame_analysis_spec.rb +++ b/spec/loop_frame_analysis_spec.rb @@ -536,7 +536,8 @@ def main_fn(ast) zig = nil expect { zig = transpile(src) }.not_to raise_error - expect(zig).to include("heapAlloc") + expect(zig).to include("charAtCodepoint(rt.heapAlloc()") + expect(zig).not_to include("saveLoopMark") end it "ForRange emits saveLoopMark + defer restoreLoopMark for loop-local list" do diff --git a/src/mir/control_flow.rb b/src/mir/control_flow.rb index 578794144..923c582a9 100644 --- a/src/mir/control_flow.rb +++ b/src/mir/control_flow.rb @@ -1295,7 +1295,7 @@ def self.process_loop!(loop_node, body) local_names = collect_local_names(body) - # Find frame-allocated local VarDecls. Values that escape into an outer + # Find frame-allocated local VarDecls. Strings that escape into an outer # container cannot be protected by a per-iteration rewind, since that would # invalidate the stored pointer. Promote those declarations to heap instead. frame_decls = local_frame_decls(body, local_names) @@ -1553,12 +1553,12 @@ def self.promote_to_heap!(ident_node) end end - # Promote a frame-allocated declaration whose value escapes this loop. + # Promote a frame-allocated string declaration whose value escapes this loop. sig { params(decl_node: T.untyped).returns(T.untyped) } def self.promote_decl_to_heap!(decl_node) decl_ti = Type.from_node(decl_node) return unless decl_ti.is_a?(Type) - return unless decl_ti.list_collection? || decl_ti.map? || decl_ti.array? || decl_ti.string? + return unless decl_ti.string? decl_ti.provenance = :heap decl_node.storage = :heap if decl_node.respond_to?(:storage=) From 9fa219262e4297cf945320bbd80e2f9f15340c80 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 9 May 2026 19:16:24 +0000 Subject: [PATCH 04/21] fix(mir): cover escaping frame collections in loops --- spec/loop_frame_analysis_spec.rb | 64 ++++++++++++++++++++++++++++++++ src/mir/control_flow.rb | 13 ++++--- 2 files changed, 71 insertions(+), 6 deletions(-) diff --git a/spec/loop_frame_analysis_spec.rb b/spec/loop_frame_analysis_spec.rb index 197404c33..c4acc5766 100644 --- a/spec/loop_frame_analysis_spec.rb +++ b/spec/loop_frame_analysis_spec.rb @@ -540,6 +540,70 @@ def main_fn(ast) expect(zig).not_to include("saveLoopMark") end + it "WhileLoop heap-promotes a loop-local list that escapes into an outer list" do + src = <<~CLEAR + FN main() RETURNS Void -> + MUTABLE outer: Int64[][]@list = []; + MUTABLE i = 0; + WHILE i < 1 DO + MUTABLE inner: Int64[]@list = []; + inner.append(i); + outer.append(inner); + i += 1; + END + RETURN; + END + CLEAR + + zig = nil + expect { zig = transpile(src) }.not_to raise_error + expect(zig).to include("inner.append(rt.heapAlloc()") + expect(zig).to include("inner_moved = true") + expect(zig).not_to include("saveLoopMark") + end + + it "WhileLoop heap-promotes a loop-local dynamic array that escapes into an outer list" do + src = <<~CLEAR + FN main() RETURNS Void -> + MUTABLE outer: Int64[][]@list = []; + MUTABLE i = 0; + WHILE i < 1 DO + inner: Int64[] = [i, i + 1]; + outer.append(inner); + i += 1; + END + RETURN; + END + CLEAR + + zig = nil + expect { zig = transpile(src) }.not_to raise_error + expect(zig).to include("rt.heapAlloc()") + expect(zig).not_to include("saveLoopMark") + end + + it "WhileLoop keeps an escaping loop-local map on heap without loop marks" do + src = <<~CLEAR + FN main() RETURNS Void -> + MUTABLE outer: HashMap[]@list = []; + MUTABLE i = 0; + WHILE i < 1 DO + MUTABLE m: HashMap = {}; + m["x"] = i; + outer.append(m); + i += 1; + END + RETURN; + END + CLEAR + + zig = nil + expect { zig = transpile(src) }.not_to raise_error + expect(zig).to include("StringMap") + expect(zig).to include("rt.heapAlloc()") + expect(zig).not_to include("saveLoopMark") + end + it "ForRange emits saveLoopMark + defer restoreLoopMark for loop-local list" do src = <<~CLEAR FN main() RETURNS Void -> diff --git a/src/mir/control_flow.rb b/src/mir/control_flow.rb index 923c582a9..c56fab55e 100644 --- a/src/mir/control_flow.rb +++ b/src/mir/control_flow.rb @@ -1295,7 +1295,7 @@ def self.process_loop!(loop_node, body) local_names = collect_local_names(body) - # Find frame-allocated local VarDecls. Strings that escape into an outer + # Find frame-allocated local VarDecls. Values that escape into an outer # container cannot be protected by a per-iteration rewind, since that would # invalidate the stored pointer. Promote those declarations to heap instead. frame_decls = local_frame_decls(body, local_names) @@ -1349,7 +1349,8 @@ def self.collect_local_names(body) # (location-based) because lists/strings annotated with @list have provenance=:frame # but location=nil (their storage field stays :stack after finalize_storage!). # Only includes types that actually make frame-arena allocations (collections, - # strings) -- primitives like Int64 are excluded even when frame_provenance? is set. + # arrays, strings) -- primitives like Int64 are excluded even when + # frame_provenance? is set. sig { params(body: Array, _local_names: T::Set[String]).returns(Array) } def self.local_frame_decls(body, _local_names) decls = [] @@ -1358,12 +1359,12 @@ def self.local_frame_decls(body, _local_names) when AST::VarDecl ti = Type.from_node(s) next unless ti - is_frame = ti.frame_provenance? && (ti.list_collection? || ti.map? || ti.string?) + is_frame = ti.frame_provenance? && (ti.list_collection? || ti.map? || ti.array? || ti.string?) decls << s if is_frame && s.name.is_a?(String) when AST::BindExpr ti = Type.from_node(s) next unless ti - is_frame = ti.frame_provenance? && (ti.list_collection? || ti.map? || ti.string?) + is_frame = ti.frame_provenance? && (ti.list_collection? || ti.map? || ti.array? || ti.string?) decls << s if s.mode == :decl && is_frame && s.name.is_a?(String) end end @@ -1553,12 +1554,12 @@ def self.promote_to_heap!(ident_node) end end - # Promote a frame-allocated string declaration whose value escapes this loop. + # Promote a frame-allocated declaration whose value escapes this loop. sig { params(decl_node: T.untyped).returns(T.untyped) } def self.promote_decl_to_heap!(decl_node) decl_ti = Type.from_node(decl_node) return unless decl_ti.is_a?(Type) - return unless decl_ti.string? + return unless decl_ti.list_collection? || decl_ti.map? || decl_ti.array? || decl_ti.string? decl_ti.provenance = :heap decl_node.storage = :heap if decl_node.respond_to?(:storage=) From 44ca8429a1f75d76ba2087a4f26920ca92dbd7de Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 9 May 2026 19:20:53 +0000 Subject: [PATCH 05/21] fix(examples): update footgun demos for current effects --- examples/footguns/08_buffer_overflow/main.cht | 8 +++---- examples/footguns/12_deadlock/main.cht | 22 +++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/examples/footguns/08_buffer_overflow/main.cht b/examples/footguns/08_buffer_overflow/main.cht index 51cf21b6d..831618df1 100644 --- a/examples/footguns/08_buffer_overflow/main.cht +++ b/examples/footguns/08_buffer_overflow/main.cht @@ -18,7 +18,7 @@ STRUCT ByteBuffer { data: Int64[] } -FN safe_access() RETURNS Void -> +FN safe_access() RETURNS !Void -> MUTABLE buf: Int64[] = [0, 0, 0, 0]; # 4-element list @@ -36,7 +36,7 @@ FN safe_access() RETURNS Void -> print("buf[3]=" + buf[3].toString()); END -FN off_by_one() RETURNS Void -> +FN off_by_one() RETURNS !Void -> arr: Int64[] = [0, 1, 2, 3, 4, 5, 6, 7]; # 8 elements (indices 0-7) @@ -49,8 +49,8 @@ FN off_by_one() RETURNS Void -> END FN main() RETURNS Void -> - safe_access(); - off_by_one(); + safe_access() OR RAISE; + off_by_one() OR RAISE; END # Summary: diff --git a/examples/footguns/12_deadlock/main.cht b/examples/footguns/12_deadlock/main.cht index 36bc9096d..bd66d87af 100644 --- a/examples/footguns/12_deadlock/main.cht +++ b/examples/footguns/12_deadlock/main.cht @@ -74,7 +74,7 @@ STRUCT Account { # ------------------------------------------------------------------------- # CORRECT: Resolve promises before acquiring locks # ------------------------------------------------------------------------- -FN correct_next_before_lock() RETURNS Void -> +FN correct_next_before_lock() RETURNS !Void -> s = State{ value: 0 } @locked; producer = BG { @@ -94,18 +94,18 @@ END # ------------------------------------------------------------------------- # CORRECT: Pass already-locked value to helpers (avoid re-entrancy) # ------------------------------------------------------------------------- -FN update_value(inner: State) RETURNS Void -> - # Receives the already-unlocked struct value — no lock needed. - inner.value = 2; +FN updated_value(inner: State) RETURNS Int64 -> + # Receives the already-locked struct value — no lock needed. + RETURN 2; END -FN correct_no_reentrant() RETURNS Void -> +FN correct_no_reentrant() RETURNS !Void -> s = State{ value: 0 } @locked; t = BG { WITH EXCLUSIVE s AS inner { inner.value = 1; - update_value(inner); # pass the value, not the lock + inner.value = updated_value(inner); # pass the value, not the lock } }; @@ -119,12 +119,12 @@ END # ------------------------------------------------------------------------- # CORRECT: Sequential locks — never hold two at once # ------------------------------------------------------------------------- -FN sequential_transfer() RETURNS Void -> +FN sequential_transfer() RETURNS !Void -> a = Account{ balance: 100 } @locked; b = Account{ balance: 200 } @locked; t = BG { - amount = 0; + MUTABLE amount = 0; WITH EXCLUSIVE a AS ra { amount = ra.balance / 2; ra.balance = ra.balance - amount; @@ -143,13 +143,13 @@ END FN main() RETURNS Void -> print("--- correct: NEXT before lock ---"); - correct_next_before_lock(); + correct_next_before_lock() OR RAISE; print("--- correct: no re-entrancy ---"); - correct_no_reentrant(); + correct_no_reentrant() OR RAISE; print("--- correct: sequential locks ---"); - sequential_transfer(); + sequential_transfer() OR RAISE; END # Summary: From ce525d5a9e3dfa1b242547c45b6ed166438b0bf7 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 9 May 2026 19:48:22 +0000 Subject: [PATCH 06/21] fix(transpiler): pass mutable value params by reference --- examples/footguns/12_deadlock/main.cht | 6 ++-- src/annotator-helpers/function_analysis.rb | 12 +++---- src/annotator.rb | 1 + src/ast/symbol_entry.rb | 2 ++ src/mir/mir_lowering.rb | 31 ++++++++++++++----- .../382_mutable_param_updates_caller.cht | 19 ++++++++++++ 6 files changed, 52 insertions(+), 19 deletions(-) create mode 100644 transpile-tests/382_mutable_param_updates_caller.cht diff --git a/examples/footguns/12_deadlock/main.cht b/examples/footguns/12_deadlock/main.cht index bd66d87af..ebc26c1c8 100644 --- a/examples/footguns/12_deadlock/main.cht +++ b/examples/footguns/12_deadlock/main.cht @@ -94,9 +94,9 @@ END # ------------------------------------------------------------------------- # CORRECT: Pass already-locked value to helpers (avoid re-entrancy) # ------------------------------------------------------------------------- -FN updated_value(inner: State) RETURNS Int64 -> +FN update_value!(MUTABLE inner: State) RETURNS Void -> # Receives the already-locked struct value — no lock needed. - RETURN 2; + inner.value = 2; END FN correct_no_reentrant() RETURNS !Void -> @@ -105,7 +105,7 @@ FN correct_no_reentrant() RETURNS !Void -> t = BG { WITH EXCLUSIVE s AS inner { inner.value = 1; - inner.value = updated_value(inner); # pass the value, not the lock + update_value!(inner); # pass the value, not the lock } }; diff --git a/src/annotator-helpers/function_analysis.rb b/src/annotator-helpers/function_analysis.rb index 9dafd58ba..e5e96aadc 100644 --- a/src/annotator-helpers/function_analysis.rb +++ b/src/annotator-helpers/function_analysis.rb @@ -401,14 +401,10 @@ def verify_function_signature!(node, signature) # post-annotation passes like the GUARD MUTABLE-mutation check # (validate_with_guard_no_body_mutation!) need to see this. # - # Critically, we mark ONLY entry.mutated, NOT - # decl_node.var_mutated. The latter drives the var/const emit - # decision for the Zig-level binding, and at the Zig level the - # call site doesn't visibly mutate the local — Zig's - # "var-never-mutated" safety check would fire if we promoted - # the binding to `var` here. The "MUTABLE never reassigned" - # lint also reads decl_node.var_mutated; keeping that path - # untouched preserves existing lint behavior. + # Critically, we mark only SymbolEntry state, not + # decl_node.var_mutated. The declaration still should not count + # as locally reassigned for lints, but lowering must emit Zig + # `var` storage so the call site can pass `&binding` as `*T`. if arg_node.is_a?(AST::Identifier) mark_var_mutated_via_call(arg_node.name) end diff --git a/src/annotator.rb b/src/annotator.rb index a99cbab54..e2877eb0d 100644 --- a/src/annotator.rb +++ b/src/annotator.rb @@ -3256,6 +3256,7 @@ def mark_var_mutated_via_call(name) entry = scope.locals[name] return unless entry entry.mutated = true + entry.mutable_ref_target = true if entry.respond_to?(:mutable_ref_target=) end # Walk a chained access expression (GetField/GetIndex chain rooted at an diff --git a/src/ast/symbol_entry.rb b/src/ast/symbol_entry.rb index 52027d3ca..5eb990f3a 100644 --- a/src/ast/symbol_entry.rb +++ b/src/ast/symbol_entry.rb @@ -96,6 +96,8 @@ class SymbolEntry :borrowed_alias, # true only for BORROWED/RESTRICT aliases — fiber capture is stack-UAF :sync_families, # Set of families when bound by REQUIRES disjunction (ATOMICS M1.6.5) :layout, # AtomicPtr M3.1: nil | :indirect — heap-pinned cell with stable address + :mutable_ref_target, # This binding is passed to a MUTABLE parameter by reference. + # Forces Zig `var` storage so &binding yields *T, not *const T. :poly_borrow_target # True-Sync-Polymorphism Gate 3: this binding has its address taken # at a universally-polymorphic call site. Forces the var_decl # to emit `var` (mutable Zig storage) so &binding yields *T, not diff --git a/src/mir/mir_lowering.rb b/src/mir/mir_lowering.rb index 11434e163..76fdd4328 100644 --- a/src/mir/mir_lowering.rb +++ b/src/mir/mir_lowering.rb @@ -1085,13 +1085,15 @@ def lower_function_def(node) else transpile_type(param[:type], is_param: true) end + zig_t = "*#{zig_t}" if mutable_scalar_params.include?(param[:name]) && zig_t != "anytype" # `pointer_passed`: this param's receiver is a pointer-to-T at the # Zig level, so allocations made inside this function on its behalf # outlive the function. Mirrors `@current_fn_collection_params`'s # criteria so the MIR checker can independently verify the # allocator-routing decision (see INV-CROSS-FRAME-PARAM-ALLOC). pointer_passed = p_type_obj.needs_pointer_passing? || - (param[:mutable] && p_type_obj.list_collection?) + (param[:mutable] && p_type_obj.list_collection?) || + mutable_scalar_params.include?(param[:name]) MIR::Param.new(p_name, zig_t, pointer_passed) } @@ -1242,7 +1244,11 @@ def lower_function_def(node) # Mutable scalar param shadows mutable_scalar_params.each do |name| next unless used_names.include?(name) - prologue << MIR::Let.new(name, MIR::Ident.new("_m_#{name}"), true, nil, "_ = &#{name};") + ptr_name = "_m_#{name}" + prologue << MIR::Let.new(name, MIR::Deref.new(MIR::Ident.new(ptr_name)), true, nil, "_ = &#{name};") + prologue << MIR::DeferStmt.new(MIR::ScopeBlock.new([ + MIR::Set.new(MIR::Deref.new(MIR::Ident.new(ptr_name)), MIR::Ident.new(name)) + ])) end # Emit AllocMark + Cleanup for TAKES parameters (replaces insert_takes_drops! from MIRPass). @@ -1613,6 +1619,13 @@ def lower_func_call(node) callee_param && callee_param[:mutable] && callee_param[:type].respond_to?(:list_collection?) && callee_param[:type].list_collection? + callee_param_type = if callee_param + callee_param[:type].is_a?(Type) ? callee_param[:type] : (Type.new(callee_param[:type] || :Any) rescue nil) + end + callee_wants_mutable_value = + callee_param && callee_param[:mutable] && a.is_a?(AST::Identifier) && + !callee_wants_mutable_list && + !(callee_param_type&.respond_to?(:needs_pointer_passing?) && callee_param_type.needs_pointer_passing?) if callee_wants_mutable_list && a.is_a?(AST::Identifier) if @current_fn_collection_params&.include?(a.name) || @current_bg_pointer_captures&.include?(a.name) @@ -1620,6 +1633,8 @@ def lower_func_call(node) else MIR::AddressOf.new(arg) end + elsif callee_wants_mutable_value + MIR::AddressOf.new(arg) elsif ti&.array? && !ti&.string? && !ti&.pool? && !a.is_a?(AST::CopyNode) && !a.is_a?(AST::MoveNode) MIR::ItemsAccess.new(arg, true) elsif ti.is_a?(Type) && Type.new(ti).needs_pointer_passing? @@ -5942,7 +5957,7 @@ def lower_var_decl(node) # flip it to `const`. Without this, &binding produces *const T, # which doesn't unify with the body's `*T` parameter and the # mutation never reaches the caller. - is_mutable = true if node.symbol&.poly_borrow_target + is_mutable = true if node.symbol&.poly_borrow_target || node.symbol&.mutable_ref_target # Post-dataflow cleanup entry (cleanup_decisions! refinements are correct here). # For same-name vars in different scopes, alloc is overridden per-declaration @@ -5956,15 +5971,15 @@ def lower_var_decl(node) ft.open_stream? || ft.inf_stream? || (ft.array? && ft.dynamic?) || ft.heap_provenance? || ft.resource? || node.resource_close_zig forced_var = is_mutable && has_mutable_cleanup - # True-Sync-Polymorphism Gate 3: a binding whose address is taken - # at a universal-poly call site MUST emit Zig `var` so &binding is + # A binding whose address is taken by a MUTABLE param or universal-poly + # call site MUST emit Zig `var` so &binding is # *T (not *const T). The local-mutation analyzer would otherwise # downgrade this to `const` when the binding is only "field-mutated" - # via the polymorphic body -- which is invisible to var_mutated. - poly_borrow = node.symbol&.poly_borrow_target == true + # via the callee body -- which is invisible to var_mutated. + by_ref_borrow = node.symbol&.mutable_ref_target == true || node.symbol&.poly_borrow_target == true keyword_mutable = if !is_mutable false - elsif actually_mutated || forced_var || poly_borrow + elsif actually_mutated || forced_var || by_ref_borrow true else false diff --git a/transpile-tests/382_mutable_param_updates_caller.cht b/transpile-tests/382_mutable_param_updates_caller.cht new file mode 100644 index 000000000..a4f828916 --- /dev/null +++ b/transpile-tests/382_mutable_param_updates_caller.cht @@ -0,0 +1,19 @@ +STRUCT Box { value: Int64 } + +FN bump!(MUTABLE x: Int64) RETURNS Void -> + x = x + 1; +END + +FN set_box!(MUTABLE b: Box) RETURNS Void -> + b.value = 7; +END + +FN main() RETURNS Void -> + MUTABLE n = 5; + bump!(n); + ASSERT n == 6, "MUTABLE primitive param updates caller binding"; + + MUTABLE box = Box{ value: 1 }; + set_box!(box); + ASSERT box.value == 7, "MUTABLE struct param updates caller binding"; +END From 33a2fb4bc27bf8c4a8f2511d7423f54d070923e2 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sat, 9 May 2026 20:02:02 +0000 Subject: [PATCH 07/21] fix(examples): make MAL compile with current semantics Modernizes the MAL example for current CLEAR syntax, fallible-return discipline, ownership copies, and pool env access patterns. This is compile-only: the interpreter still needs follow-up work before its runtime assertions pass correctly. --- examples/mal/interpreter.cht | 445 ++++++++++++++++++----------------- 1 file changed, 226 insertions(+), 219 deletions(-) diff --git a/examples/mal/interpreter.cht b/examples/mal/interpreter.cht index 47494f437..d3fc0d327 100644 --- a/examples/mal/interpreter.cht +++ b/examples/mal/interpreter.cht @@ -1,10 +1,10 @@ --- Mal (Make-a-Lisp) Interpreter in CLEAR — Pool-based edition --- --- Uses Env[50000]@pool for scoped environments instead of a flat HashMap. --- Each Env holds a HashMap for its local bindings. --- Parent links stored as Value.EnvRef in vars["__p"]. --- Lambda closures capture envId: Id directly; body stored as Value @indirect. --- Parser uses HashMap with numeric keys (avoids frame-arena string issues). +# Mal (Make-a-Lisp) Interpreter in CLEAR - Pool-based edition +# +# Uses Env[50000]@pool for scoped environments instead of a flat HashMap. +# Each Env holds a HashMap for its local bindings. +# Parent links stored as Value.EnvRef in vars["__p"]. +# Lambda closures capture envId: Id directly; body stored as Value @indirect. +# Parser uses HashMap with numeric keys (avoids frame-arena string issues). STRUCT Env { vars: HashMap @@ -23,7 +23,7 @@ UNION Value { Error { errMsg: String, errKind: String } } --- Pure helpers +# Pure helpers FN boolVal(b: Bool) RETURNS Value -> IF b -> RETURN Value.TrueVal; @@ -31,54 +31,54 @@ FN boolVal(b: Bool) RETURNS Value -> END FN isTruthy?(v: Value) RETURNS Bool -> - MATCH v START Value.Nil -> RETURN FALSE;, Value.FalseVal -> RETURN FALSE;, DEFAULT -> RETURN TRUE; END + PARTIAL MATCH v START Value.Nil -> RETURN FALSE;, Value.FalseVal -> RETURN FALSE;, DEFAULT -> RETURN TRUE; END RETURN TRUE; END FN getSymName(v: Value) RETURNS String -> - MATCH v START Value.Symbol AS s -> RETURN s;, DEFAULT -> RETURN ""; END + PARTIAL MATCH v START Value.Symbol AS s -> RETURN s;, DEFAULT -> RETURN ""; END RETURN ""; END FN getNum(v: Value) RETURNS Float64 -> - MATCH v START Value.Number AS n -> RETURN n;, DEFAULT -> RETURN 0.0; END + PARTIAL MATCH v START Value.Number AS n -> RETURN n;, DEFAULT -> RETURN 0.0; END RETURN 0.0; END FN getStr(v: Value) RETURNS String -> - MATCH v START Value.Str AS s -> RETURN s;, DEFAULT -> RETURN ""; END + PARTIAL MATCH v START Value.Str AS s -> RETURN s;, DEFAULT -> RETURN ""; END RETURN ""; END FN getNativeId(v: Value) RETURNS Int64 -> - MATCH v START Value.NativeFn AS id -> RETURN id;, DEFAULT -> RETURN 0; END + PARTIAL MATCH v START Value.NativeFn AS id -> RETURN id;, DEFAULT -> RETURN 0; END RETURN 0; END FN isList?(v: Value) RETURNS Bool -> - MATCH v START Value.List -> RETURN TRUE;, DEFAULT -> RETURN FALSE; END + PARTIAL MATCH v START Value.List -> RETURN TRUE;, DEFAULT -> RETURN FALSE; END RETURN FALSE; END FN listLen(v: Value) RETURNS Int64 -> - MATCH v START Value.List AS items -> RETURN items.length();, DEFAULT -> RETURN 0; END + PARTIAL MATCH v START Value.List AS items -> RETURN items.length();, DEFAULT -> RETURN 0; END RETURN 0; END FN isLambda?(v: Value) RETURNS Bool -> - MATCH v START Value.Lambda -> RETURN TRUE;, DEFAULT -> RETURN FALSE; END + PARTIAL MATCH v START Value.Lambda -> RETURN TRUE;, DEFAULT -> RETURN FALSE; END RETURN FALSE; END -FN valEqual?(a: Value, b: Value) RETURNS Bool @reentrant -> - MATCH a START - Value.Nil -> MATCH b START Value.Nil -> RETURN TRUE;, DEFAULT -> RETURN FALSE; END, - Value.TrueVal -> MATCH b START Value.TrueVal -> RETURN TRUE;, DEFAULT -> RETURN FALSE; END, - Value.FalseVal -> MATCH b START Value.FalseVal -> RETURN TRUE;, DEFAULT -> RETURN FALSE; END, - Value.Number AS na -> MATCH b START Value.Number AS nb -> RETURN na == nb;, DEFAULT -> RETURN FALSE; END, - Value.Str AS sa -> MATCH b START Value.Str AS sb -> RETURN sa == sb;, DEFAULT -> RETURN FALSE; END, - Value.Symbol AS sa -> MATCH b START Value.Symbol AS sb -> RETURN sa == sb;, DEFAULT -> RETURN FALSE; END, - Value.List AS la -> MATCH b START +FN valEqual?(a: Value, b: Value) RETURNS Bool EFFECTS REENTRANT -> + PARTIAL MATCH a START + Value.Nil -> PARTIAL MATCH b START Value.Nil -> RETURN TRUE;, DEFAULT -> RETURN FALSE; END, + Value.TrueVal -> PARTIAL MATCH b START Value.TrueVal -> RETURN TRUE;, DEFAULT -> RETURN FALSE; END, + Value.FalseVal -> PARTIAL MATCH b START Value.FalseVal -> RETURN TRUE;, DEFAULT -> RETURN FALSE; END, + Value.Number AS na -> PARTIAL MATCH b START Value.Number AS nb -> RETURN na == nb;, DEFAULT -> RETURN FALSE; END, + Value.Str AS sa -> PARTIAL MATCH b START Value.Str AS sb -> RETURN sa == sb;, DEFAULT -> RETURN FALSE; END, + Value.Symbol AS sa -> PARTIAL MATCH b START Value.Symbol AS sb -> RETURN sa == sb;, DEFAULT -> RETURN FALSE; END, + Value.List AS la -> PARTIAL MATCH b START Value.List AS lb -> IF la.length() != lb.length() THEN RETURN FALSE; END FOR ci IN (0_i64 ..< la.length()) DO @@ -86,7 +86,7 @@ FN valEqual?(a: Value, b: Value) RETURNS Bool @reentrant -> END RETURN TRUE;, DEFAULT -> RETURN FALSE; END, - Value.Vector AS va -> MATCH b START + Value.Vector AS va -> PARTIAL MATCH b START Value.Vector AS vb -> IF va.length() != vb.length() THEN RETURN FALSE; END FOR vi IN (0_i64 ..< va.length()) DO @@ -94,7 +94,7 @@ FN valEqual?(a: Value, b: Value) RETURNS Bool @reentrant -> END RETURN TRUE;, DEFAULT -> RETURN FALSE; END, - Value.Pair AS pa -> MATCH b START + Value.Pair AS pa -> PARTIAL MATCH b START Value.Pair AS pb -> RETURN valEqual?(pa.pairCar, pb.pairCar) && valEqual?(pa.pairCdr, pb.pairCdr);, DEFAULT -> RETURN FALSE; END, @@ -103,19 +103,19 @@ FN valEqual?(a: Value, b: Value) RETURNS Bool @reentrant -> RETURN FALSE; END -FN readAtom(token: String) RETURNS Value -> +FN readAtom(token: String) RETURNS !Value -> IF token.length() == 0 THEN RETURN Value.Nil; END IF token == "nil" THEN RETURN Value.Nil; END IF token == "true" THEN RETURN Value.TrueVal; END IF token == "false" THEN RETURN Value.FalseVal; END - IF charAt(token, 0) == "\"" THEN RETURN Value{ Str: substr(token, 1, token.length() - 2) }; END + IF charAt(token, 0) == "\"" THEN RETURN Value{ Str: COPY substr(token, 1, token.length() - 2) }; END n = toNumber(token) OR (0.0 - 999999.0); IF n != 0.0 - 999999.0 THEN RETURN Value{ Number: n }; END - RETURN Value{ Symbol: token }; + RETURN Value{ Symbol: COPY token }; END -FN prStr(v: Value, readably: Bool) RETURNS String @reentrant -> - MATCH v START +FN prStr(v: Value, readably: Bool) RETURNS !String EFFECTS REENTRANT -> + PARTIAL MATCH v START Value.Nil -> RETURN "nil";, Value.TrueVal -> RETURN "true";, Value.FalseVal -> RETURN "false";, @@ -149,19 +149,19 @@ FN prStr(v: Value, readably: Bool) RETURNS String @reentrant -> RETURN ""; END --- Parser: tokenize into HashMap with numeric position tracking. --- Tokens stored as Value.Str at keys "__t0", "__t1", etc. --- Position and count stored as Value.Number at "__rp" and "__tc". --- This avoids toString() inside WHILE loops for position updates. +# Parser: tokenize into HashMap with numeric position tracking. +# Tokens stored as Value.Str at keys "__t0", "__t1", etc. +# Position and count stored as Value.Number at "__rp" and "__tc". +# This avoids toString() inside WHILE loops for position updates. -FN tokenizeToEnv!(MUTABLE penv: HashMap, str: String) RETURNS Void -> +FN tokenizeToEnv!(MUTABLE penv: HashMap, str: String) RETURNS !Void -> MUTABLE count: Int64 = 0; MUTABLE i: Int64 = 0; WHILE i < str.length() DO c = charAt(str, i); IF c == " " || c == "," || c == "\n" || c == "\t" THEN i += 1; ELSE_IF c == "(" || c == ")" || c == "[" || c == "]" THEN - penv["__t" + count.toString()] = Value{ Str: c }; + penv["__t" + count.toString()] = Value{ Str: COPY c }; count += 1; i += 1; ELSE_IF c == ";" THEN @@ -176,7 +176,7 @@ FN tokenizeToEnv!(MUTABLE penv: HashMap, str: String) RETURNS Void -> END END s = s + "\""; - penv["__t" + count.toString()] = Value{ Str: s }; + penv["__t" + count.toString()] = Value{ Str: COPY s }; count += 1; i += 1; ELSE @@ -185,7 +185,7 @@ FN tokenizeToEnv!(MUTABLE penv: HashMap, str: String) RETURNS Void -> s = s + charAt(str, i); i += 1; END IF s.length() > 0 THEN - penv["__t" + count.toString()] = Value{ Str: s }; + penv["__t" + count.toString()] = Value{ Str: COPY s }; count += 1; END END @@ -194,12 +194,12 @@ FN tokenizeToEnv!(MUTABLE penv: HashMap, str: String) RETURNS Void -> RETURN; END -FN getTokenStr!(MUTABLE penv: HashMap, idx: Int64) RETURNS String -> +FN getTokenStr!(MUTABLE penv: HashMap, idx: Int64) RETURNS !String -> val = penv["__t" + idx.toString()] OR Value.Nil; RETURN getStr(val); END -FN readFormEnv!(MUTABLE penv: HashMap) RETURNS Value @reentrant -> +FN readFormEnv!(MUTABLE penv: HashMap) RETURNS !Value EFFECTS REENTRANT -> posVal = penv["__rp"] OR Value{ Number: 0.0 }; tcVal = penv["__tc"] OR Value{ Number: 0.0 }; pos = toInt(getNum(posVal)); @@ -219,7 +219,7 @@ FN readFormEnv!(MUTABLE penv: HashMap) RETURNS Value @reentrant -> RETURN readAtom(tok); END -FN readListEnv!(MUTABLE penv: HashMap) RETURNS Value @reentrant -> +FN readListEnv!(MUTABLE penv: HashMap) RETURNS !Value EFFECTS REENTRANT -> MUTABLE items: Value[]@list = List[]; MUTABLE listDone = FALSE; WHILE listDone == FALSE DO @@ -242,27 +242,27 @@ FN readListEnv!(MUTABLE penv: HashMap) RETURNS Value @reentrant -> RETURN Value{ List: items }; END --- Native function dispatch by numeric ID. --- IDs: 1=+ 2=- 3=* 4=/ 5== 6=< 7=> 8=<= 9=>= --- 10=list 11=list? 12=empty? 13=count 14=not 15=prn --- 16=vector 17=vector-ref 18=vector-set! 19=vector-length 20=vector? --- 21=cons 22=car 23=cdr 24=pair? 25=eq? --- 26=string-append 27=string-length 28=substring 29=string-ref --- 30=number->string 31=string->number 32=string? 33=display +# Native function dispatch by numeric ID. +# IDs: 1=+ 2=- 3=* 4=/ 5== 6=< 7=> 8=<= 9=>= +# 10=list 11=list? 12=empty? 13=count 14=not 15=prn +# 16=vector 17=vector-ref 18=vector-set! 19=vector-length 20=vector? +# 21=cons 22=car 23=cdr 24=pair? 25=eq? +# 26=string-append 27=string-length 28=substring 29=string-ref +# 30=number->string 31=string->number 32=string? 33=display -FN applyNative(id: Int64, evaled: Value[]) RETURNS Value @reentrant -> - -- Arithmetic +FN applyNative(id: Int64, evaled: Value[]) RETURNS !Value EFFECTS REENTRANT -> + # Arithmetic IF id == 1 THEN RETURN Value{ Number: getNum(evaled[1]) + getNum(evaled[2]) }; END IF id == 2 THEN RETURN Value{ Number: getNum(evaled[1]) - getNum(evaled[2]) }; END IF id == 3 THEN RETURN Value{ Number: getNum(evaled[1]) * getNum(evaled[2]) }; END IF id == 4 THEN RETURN Value{ Number: getNum(evaled[1]) / getNum(evaled[2]) }; END - -- Comparison + # Comparison IF id == 5 THEN RETURN boolVal(valEqual?(evaled[1], evaled[2])); END IF id == 6 THEN RETURN boolVal(getNum(evaled[1]) < getNum(evaled[2])); END IF id == 7 THEN RETURN boolVal(getNum(evaled[1]) > getNum(evaled[2])); END IF id == 8 THEN RETURN boolVal(getNum(evaled[1]) <= getNum(evaled[2])); END IF id == 9 THEN RETURN boolVal(getNum(evaled[1]) >= getNum(evaled[2])); END - -- List + # List IF id == 10 THEN MUTABLE litems: Value[]@list = List[]; FOR li IN (1_i64 ..< evaled.length()) -> @@ -274,42 +274,42 @@ FN applyNative(id: Int64, evaled: Value[]) RETURNS Value @reentrant -> IF id == 13 THEN RETURN Value{ Number: toFloat(listLen(evaled[1])) }; END IF id == 14 THEN RETURN boolVal(isTruthy?(evaled[1]) == FALSE); END IF id == 15 THEN print(prStr(evaled[1], TRUE)); RETURN Value.Nil; END - -- Vector + # Vector IF id == 16 THEN MUTABLE velems: Value[]@list = List[]; FOR vi IN (1_i64 ..< evaled.length()) -> - velems.append(evaled[vi]); + velems.append(COPY evaled[vi]); RETURN Value{ Vector: velems }; END IF id == 17 THEN RETURN vecRef(evaled[1], toInt(getNum(evaled[2]))); END IF id == 18 THEN RETURN Value.Nil; END IF id == 19 THEN RETURN Value{ Number: toFloat(vecLen(evaled[1])) }; END IF id == 20 THEN RETURN boolVal(isVector?(evaled[1])); END - -- Pair - IF id == 21 THEN RETURN Value.Pair{ pairCar: evaled[1], pairCdr: evaled[2] }; END + # Pair + IF id == 21 THEN RETURN Value.Pair{ pairCar: COPY evaled[1], pairCdr: COPY evaled[2] }; END IF id == 22 THEN RETURN pairCar(evaled[1]); END IF id == 23 THEN RETURN pairCdr(evaled[1]); END IF id == 24 THEN RETURN boolVal(isPair?(evaled[1])); END IF id == 25 THEN RETURN boolVal(valEqual?(evaled[1], evaled[2])); END - -- String + # String IF id == 26 THEN MUTABLE out = getStr(evaled[1]); FOR si IN (2_i64 ..< evaled.length()) DO out = out + getStr(evaled[si]); END - RETURN Value{ Str: out }; + RETURN Value{ Str: COPY out }; END IF id == 27 THEN RETURN Value{ Number: toFloat(getStr(evaled[1]).length()) }; END IF id == 28 THEN s = getStr(evaled[1]); start = toInt(getNum(evaled[2])); end_ = toInt(getNum(evaled[3])); - RETURN Value{ Str: substr(s, start, end_ - start) }; + RETURN Value{ Str: COPY substr(s, start, end_ - start) }; END IF id == 29 THEN s = getStr(evaled[1]); idx = toInt(getNum(evaled[2])); - RETURN Value{ Str: charAt(s, idx) }; + RETURN Value{ Str: COPY charAt(s, idx) }; END IF id == 30 THEN n = getNum(evaled[1]); @@ -322,7 +322,7 @@ FN applyNative(id: Int64, evaled: Value[]) RETURNS Value @reentrant -> RETURN Value{ Number: parsed }; END IF id == 32 THEN - MATCH evaled[1] START Value.Str -> RETURN Value.TrueVal;, DEFAULT -> RETURN Value.FalseVal; END + PARTIAL MATCH evaled[1] START Value.Str -> RETURN Value.TrueVal;, DEFAULT -> RETURN Value.FalseVal; END RETURN Value.FalseVal; END IF id == 33 THEN print(prStr(evaled[1], FALSE)); RETURN Value.Nil; END @@ -330,65 +330,69 @@ FN applyNative(id: Int64, evaled: Value[]) RETURNS Value @reentrant -> RETURN Value.Nil; END --- Environment operations using Pool +# Environment operations using Pool -FN envGet!(envId: Id, name: String, MUTABLE pool: Env[50000]@pool) RETURNS Value @reentrant -> - val = pool[envId]?.vars[name] OR Value{ Number: 0.0 - 777777.0 }; - MATCH val START - Value.Number AS n -> - IF n == 0.0 - 777777.0 THEN - parentVal = pool[envId]?.vars["__p"] OR Value.Nil; - MATCH parentVal START - Value.EnvRef AS pid -> RETURN envGet!(pid, name, pool);, - DEFAULT -> RETURN Value.Nil; +FN envGet!(envId: Id, name: String, MUTABLE pool: Env[50000]@pool) RETURNS !Value EFFECTS REENTRANT -> + IF pool[envId] AS env THEN + val = env.vars[name] OR Value{ Number: 0.0 - 777777.0 }; + PARTIAL MATCH val START + Value.Number AS n -> + IF n == 0.0 - 777777.0 THEN + parentVal = env.vars["__p"] OR Value.Nil; + PARTIAL MATCH parentVal START + Value.EnvRef AS pid -> RETURN envGet!(pid, name, pool);, + DEFAULT -> RETURN Value.Nil; + END END - END - RETURN val;, - DEFAULT -> RETURN val; + RETURN COPY val;, + DEFAULT -> RETURN COPY val; + END END RETURN Value.Nil; END --- envSet!: walk scope chain, update existing binding. Returns TRUE if found. +# envSet!: walk scope chain, update existing binding. Returns TRUE if found. -FN envSet!(envId: Id, name: String, val: Value, MUTABLE pool: Env[50000]@pool) RETURNS Bool @reentrant -> - existing = pool[envId]?.vars[name] OR Value{ Number: 0.0 - 777777.0 }; - MATCH existing START - Value.Number AS n -> - IF n == 0.0 - 777777.0 THEN - parentVal = pool[envId]?.vars["__p"] OR Value.Nil; - MATCH parentVal START - Value.EnvRef AS pid -> RETURN envSet!(pid, name, val, pool);, - DEFAULT -> RETURN FALSE; +FN envSet!(envId: Id, name: String, val: Value, MUTABLE pool: Env[50000]@pool) RETURNS !Bool EFFECTS REENTRANT -> + IF pool[envId] AS env THEN + existing = env.vars[name] OR Value{ Number: 0.0 - 777777.0 }; + PARTIAL MATCH existing START + Value.Number AS n -> + IF n == 0.0 - 777777.0 THEN + parentVal = env.vars["__p"] OR Value.Nil; + PARTIAL MATCH parentVal START + Value.EnvRef AS pid -> RETURN envSet!(pid, name, val, pool);, + DEFAULT -> RETURN FALSE; + END END - END - pool[envId]?.vars[name] = val; - RETURN TRUE;, - DEFAULT -> - pool[envId]?.vars[name] = val; - RETURN TRUE; + env.vars[name] = COPY val; + RETURN TRUE;, + DEFAULT -> + env.vars[name] = COPY val; + RETURN TRUE; + END END RETURN FALSE; END --- eval: TCO trampoline loop. Tail positions (if branches, begin/do last expr, --- let body, lambda body) reassign ast/curEnv and continue instead of recursing. +# eval: TCO trampoline loop. Tail positions (if branches, begin/do last expr, +# let body, lambda body) reassign ast/curEnv and continue instead of recursing. FN isVector?(v: Value) RETURNS Bool -> - MATCH v START Value.Vector -> RETURN TRUE;, DEFAULT -> RETURN FALSE; END + PARTIAL MATCH v START Value.Vector -> RETURN TRUE;, DEFAULT -> RETURN FALSE; END RETURN FALSE; END FN vecLen(v: Value) RETURNS Int64 -> - MATCH v START Value.Vector AS elems -> RETURN elems.length();, DEFAULT -> RETURN 0; END + PARTIAL MATCH v START Value.Vector AS elems -> RETURN elems.length();, DEFAULT -> RETURN 0; END RETURN 0; END -FN vecRef(v: Value, idx: Int64) RETURNS Value -> - MATCH v START +FN vecRef(v: Value, idx: Int64) RETURNS !Value -> + PARTIAL MATCH v START Value.Vector AS elems -> elem = elems[idx]; - RETURN elem;, + RETURN COPY elem;, DEFAULT -> RETURN Value.Nil; END RETURN Value.Nil; @@ -396,76 +400,76 @@ END FN isPair?(v: Value) RETURNS Bool -> - MATCH v START Value.Pair -> RETURN TRUE;, DEFAULT -> RETURN FALSE; END + PARTIAL MATCH v START Value.Pair -> RETURN TRUE;, DEFAULT -> RETURN FALSE; END RETURN FALSE; END -FN pairCar(v: Value) RETURNS Value -> - MATCH v START +FN pairCar(v: Value) RETURNS !Value -> + PARTIAL MATCH v START Value.Pair AS p -> result = p.pairCar; - RETURN result;, + RETURN COPY result;, DEFAULT -> RETURN Value.Nil; END RETURN Value.Nil; END -FN pairCdr(v: Value) RETURNS Value -> - MATCH v START +FN pairCdr(v: Value) RETURNS !Value -> + PARTIAL MATCH v START Value.Pair AS p -> result = p.pairCdr; - RETURN result;, + RETURN COPY result;, DEFAULT -> RETURN Value.Nil; END RETURN Value.Nil; END FN isError?(v: Value) RETURNS Bool -> - MATCH v START Value.Error -> RETURN TRUE;, DEFAULT -> RETURN FALSE; END + PARTIAL MATCH v START Value.Error -> RETURN TRUE;, DEFAULT -> RETURN FALSE; END RETURN FALSE; END FN getErrMsg(v: Value) RETURNS String -> - MATCH v START Value.Error AS e -> RETURN e.errMsg;, DEFAULT -> RETURN ""; END + PARTIAL MATCH v START Value.Error AS e -> RETURN e.errMsg;, DEFAULT -> RETURN ""; END RETURN ""; END FN getErrKind(v: Value) RETURNS String -> - MATCH v START Value.Error AS e -> RETURN e.errKind;, DEFAULT -> RETURN ""; END + PARTIAL MATCH v START Value.Error AS e -> RETURN e.errKind;, DEFAULT -> RETURN ""; END RETURN ""; END -FN handleCatch!(catchExpr: Value, errMsg: String, errKind: String, envId: Id, MUTABLE pool: Env[50000]@pool) RETURNS Value -> - MATCH catchExpr START +FN handleCatch!(catchExpr: Value, errMsg: String, errKind: String, envId: Id, MUTABLE pool: Env[50000]@pool) RETURNS !Value -> + PARTIAL MATCH catchExpr START Value.List AS catchItems -> catchEnvId: Id = pool.insert(Env{ vars: {} }); - pool[catchEnvId]?.vars["__p"] = Value{ EnvRef: envId }; + IF pool[catchEnvId] AS env THEN env.vars["__p"] = Value{ EnvRef: envId }; END errBindName = getSymName(catchItems[1]); - pool[catchEnvId]?.vars[errBindName] = Value.Error{ errMsg: errMsg, errKind: errKind }; - RETURN Value.Tco{ tcoAst: catchItems[2], tcoEnv: catchEnvId };, - DEFAULT -> RETURN Value.Error{ errMsg: errMsg, errKind: errKind }; + IF pool[catchEnvId] AS env THEN env.vars[errBindName] = Value.Error{ errMsg: COPY errMsg, errKind: COPY errKind }; END + RETURN Value.Tco{ tcoAst: COPY catchItems[2], tcoEnv: catchEnvId };, + DEFAULT -> RETURN Value.Error{ errMsg: COPY errMsg, errKind: COPY errKind }; END - RETURN Value.Error{ errMsg: errMsg, errKind: errKind }; + RETURN Value.Error{ errMsg: COPY errMsg, errKind: COPY errKind }; END FN isSymbol?(v: Value) RETURNS Bool -> - MATCH v START Value.Symbol -> RETURN TRUE;, DEFAULT -> RETURN FALSE; END + PARTIAL MATCH v START Value.Symbol -> RETURN TRUE;, DEFAULT -> RETURN FALSE; END RETURN FALSE; END --- eval: TCO trampoline. evalList! returns Value.Tco to signal tail call. +# eval: TCO trampoline. evalList! returns Value.Tco to signal tail call. -FN eval!(astIn: Value, envId: Id, MUTABLE pool: Env[50000]@pool) RETURNS Value @reentrant -> - MUTABLE ast: Value = astIn; +FN eval!(astIn: Value, envId: Id, MUTABLE pool: Env[50000]@pool) RETURNS !Value EFFECTS REENTRANT -> + MUTABLE ast: Value = COPY astIn; MUTABLE curEnv: Id = envId; MUTABLE tcoActive = TRUE; WHILE tcoActive DO - MATCH ast START + PARTIAL MATCH ast START Value.Symbol AS sym -> RETURN envGet!(curEnv, sym, pool);, Value.List AS listItems -> result = evalList!(listItems, curEnv, pool); - MATCH result START + PARTIAL MATCH result START Value.Tco AS tco -> ast = tco.tcoAst; curEnv = tco.tcoEnv;, @@ -477,72 +481,75 @@ FN eval!(astIn: Value, envId: Id, MUTABLE pool: Env[50000]@pool) RETURNS Va RETURN Value.Nil; END -FN evalList!(items: Value[], envId: Id, MUTABLE pool: Env[50000]@pool) RETURNS Value @reentrant -> +FN evalList!(items: Value[], envId: Id, MUTABLE pool: Env[50000]@pool) RETURNS !Value EFFECTS REENTRANT -> IF items.length() == 0 THEN RETURN Value.Nil; END formName = getSymName(items[0]); - -- Error introspection: special forms to avoid error propagation + # Error introspection: special forms to avoid error propagation IF formName == "error?" THEN val = eval!(COPY items[1], envId, pool); RETURN boolVal(isError?(val)); ELSE_IF formName == "error-message" THEN - val = eval!(items[1], envId, pool); - RETURN Value{ Str: getErrMsg(val) }; + val = eval!(COPY items[1], envId, pool); + RETURN Value{ Str: COPY getErrMsg(val) }; ELSE_IF formName == "error-kind" THEN - val = eval!(items[1], envId, pool); - RETURN Value{ Str: getErrKind(val) }; + val = eval!(COPY items[1], envId, pool); + RETURN Value{ Str: COPY getErrKind(val) }; ELSE_IF formName == "quote" THEN quoted = items[1]; - RETURN quoted; + RETURN COPY quoted; ELSE_IF formName == "raise" THEN msg = eval!(items[1], envId, pool); IF isError?(msg) THEN RETURN msg; END kind = eval!(items[2], envId, pool); IF isError?(kind) THEN RETURN kind; END - RETURN Value.Error{ errMsg: getStr(msg), errKind: getStr(kind) }; + RETURN Value.Error{ errMsg: COPY getStr(msg), errKind: COPY getStr(kind) }; ELSE_IF formName == "try" THEN - -- (try expr (catch e handler)) + # (try expr (catch e handler)) MUTABLE tryResult: Value = eval!(items[1], envId, pool); - MATCH tryResult START + PARTIAL MATCH tryResult START Value.Error AS e -> RETURN handleCatch!(items[2], e.errMsg, e.errKind, envId, pool);, - DEFAULT -> RETURN tryResult; + DEFAULT -> RETURN COPY tryResult; END - RETURN tryResult; + RETURN COPY tryResult; ELSE_IF formName == "def!" || formName == "define" THEN defName = getSymName(items[1]); val = eval!(items[2], envId, pool); IF isError?(val) THEN RETURN val; END - pool[envId]?.vars[defName] = val; - result = pool[envId]?.vars[defName] OR Value.Nil; - RETURN result; + IF pool[envId] AS env THEN + env.vars[defName] = COPY val; + result = env.vars[defName] OR Value.Nil; + RETURN COPY result; + END + RETURN Value.Nil; ELSE_IF formName == "set!" THEN setName = getSymName(items[1]); setVal = eval!(items[2], envId, pool); IF isError?(setVal) THEN RETURN setVal; END envSet!(envId, setName, setVal, pool); - RETURN setVal; + RETURN COPY setVal; ELSE_IF formName == "let*" || formName == "let" THEN - MATCH items[1] START + PARTIAL MATCH items[1] START Value.List AS binds -> letId: Id = pool.insert(Env{ vars: {} }); - pool[letId]?.vars["__p"] = Value{ EnvRef: envId }; + IF pool[letId] AS env THEN env.vars["__p"] = Value{ EnvRef: envId }; END IF binds.length() > 0 && isList?(binds[0]) THEN FOR bi IN (0_i64 ..< binds.length()) DO - MATCH binds[bi] START + PARTIAL MATCH binds[bi] START Value.List AS pair -> bName = getSymName(pair[0]); bVal = eval!(COPY pair[1], letId, pool); IF isError?(bVal) THEN RETURN bVal; END - pool[letId]?.vars[bName] = bVal;, + IF pool[letId] AS letEnv THEN letEnv.vars[bName] = COPY bVal; END, DEFAULT -> PASS; END END @@ -552,7 +559,7 @@ FN evalList!(items: Value[], envId: Id, MUTABLE pool: Env[50000]@pool) RETU bName = getSymName(binds[bi]); bVal = eval!(COPY binds[bi + 1], letId, pool); IF isError?(bVal) THEN RETURN bVal; END - pool[letId]?.vars[bName] = bVal; + IF pool[letId] AS env THEN env.vars[bName] = COPY bVal; END bi += 2; END END @@ -562,7 +569,7 @@ FN evalList!(items: Value[], envId: Id, MUTABLE pool: Env[50000]@pool) RETU RETURN Value.Nil; ELSE_IF formName == "fn*" || formName == "lambda" THEN - MATCH items[1] START + PARTIAL MATCH items[1] START Value.List AS pnames -> RETURN Value.Lambda{ params: COPY pnames, body: COPY items[2], envId: envId };, DEFAULT -> RETURN Value.Nil; @@ -595,17 +602,17 @@ FN evalList!(items: Value[], envId: Id, MUTABLE pool: Env[50000]@pool) RETU IF isError?(argVal) THEN RETURN argVal; END evaled.append(argVal); END - f = evaled[0]; + f = COPY evaled[0]; IF isLambda?(f) THEN - MATCH f START + PARTIAL MATCH f START Value.Lambda AS lam -> callId: Id = pool.insert(Env{ vars: {} }); - pool[callId]?.vars["__p"] = Value{ EnvRef: lam.envId }; + IF pool[callId] AS env THEN env.vars["__p"] = Value{ EnvRef: lam.envId }; END FOR pi IN (0_i64 ..< lam.params.length()) DO pname = getSymName(lam.params[pi]); - pool[callId]?.vars[pname] = evaled[pi + 1]; + IF pool[callId] AS env THEN env.vars[pname] = COPY evaled[pi + 1]; END END - RETURN Value.Tco{ tcoAst: lam.body, tcoEnv: callId };, + RETURN Value.Tco{ tcoAst: COPY lam.body, tcoEnv: callId };, DEFAULT -> RETURN Value.Nil; END ELSE @@ -618,72 +625,72 @@ FN evalList!(items: Value[], envId: Id, MUTABLE pool: Env[50000]@pool) RETU END END --- runTest: tokenize + parse + eval +# runTest: tokenize + parse + eval -FN runTest!(input: String, envId: Id, MUTABLE pool: Env[50000]@pool, MUTABLE penv: HashMap) RETURNS Value @reentrant -> +FN runTest!(input: String, envId: Id, MUTABLE pool: Env[50000]@pool, MUTABLE penv: HashMap) RETURNS !Value EFFECTS REENTRANT -> tokenizeToEnv!(penv, input); penv["__rp"] = Value{ Number: 0.0 }; ast = readFormEnv!(penv); RETURN eval!(ast, envId, pool); END --- Setup: create root env with all native functions registered. --- Returns the root env Id. +# Setup: create root env with all native functions registered. +# Returns the root env Id. -FN setupEnv!(MUTABLE pool: Env[50000]@pool) RETURNS Id -> +FN setupEnv!(MUTABLE pool: Env[50000]@pool) RETURNS !Id -> rootId: Id = pool.insert(Env{ vars: {} }); - -- Arithmetic: 1-4 - pool[rootId]?.vars["+"] = Value{ NativeFn: 1 }; - pool[rootId]?.vars["-"] = Value{ NativeFn: 2 }; - pool[rootId]?.vars["*"] = Value{ NativeFn: 3 }; - pool[rootId]?.vars["/"] = Value{ NativeFn: 4 }; - -- Comparison: 5-9 - pool[rootId]?.vars["="] = Value{ NativeFn: 5 }; - pool[rootId]?.vars["<"] = Value{ NativeFn: 6 }; - pool[rootId]?.vars[">"] = Value{ NativeFn: 7 }; - pool[rootId]?.vars["<="] = Value{ NativeFn: 8 }; - pool[rootId]?.vars[">="] = Value{ NativeFn: 9 }; - -- List: 10-15 - pool[rootId]?.vars["list"] = Value{ NativeFn: 10 }; - pool[rootId]?.vars["list?"] = Value{ NativeFn: 11 }; - pool[rootId]?.vars["empty?"] = Value{ NativeFn: 12 }; - pool[rootId]?.vars["count"] = Value{ NativeFn: 13 }; - pool[rootId]?.vars["not"] = Value{ NativeFn: 14 }; - pool[rootId]?.vars["prn"] = Value{ NativeFn: 15 }; - -- Vector: 16-20 - pool[rootId]?.vars["vector"] = Value{ NativeFn: 16 }; - pool[rootId]?.vars["vector-ref"] = Value{ NativeFn: 17 }; - pool[rootId]?.vars["vector-set!"] = Value{ NativeFn: 18 }; - pool[rootId]?.vars["vector-length"] = Value{ NativeFn: 19 }; - pool[rootId]?.vars["vector?"] = Value{ NativeFn: 20 }; - -- Pair: 21-24 - pool[rootId]?.vars["cons"] = Value{ NativeFn: 21 }; - pool[rootId]?.vars["car"] = Value{ NativeFn: 22 }; - pool[rootId]?.vars["cdr"] = Value{ NativeFn: 23 }; - pool[rootId]?.vars["pair?"] = Value{ NativeFn: 24 }; - -- Symbol comparison: 25 - pool[rootId]?.vars["eq?"] = Value{ NativeFn: 25 }; - -- String: 26-33 - pool[rootId]?.vars["string-append"] = Value{ NativeFn: 26 }; - pool[rootId]?.vars["string-length"] = Value{ NativeFn: 27 }; - pool[rootId]?.vars["substring"] = Value{ NativeFn: 28 }; - pool[rootId]?.vars["string-ref"] = Value{ NativeFn: 29 }; - pool[rootId]?.vars["number->string"] = Value{ NativeFn: 30 }; - pool[rootId]?.vars["string->number"] = Value{ NativeFn: 31 }; - pool[rootId]?.vars["string?"] = Value{ NativeFn: 32 }; - pool[rootId]?.vars["display"] = Value{ NativeFn: 33 }; - -- error?, error-message, error-kind are special forms (not native fns) + # Arithmetic: 1-4 + IF pool[rootId] AS env THEN env.vars["+"] = Value{ NativeFn: 1 }; END + IF pool[rootId] AS env THEN env.vars["-"] = Value{ NativeFn: 2 }; END + IF pool[rootId] AS env THEN env.vars["*"] = Value{ NativeFn: 3 }; END + IF pool[rootId] AS env THEN env.vars["/"] = Value{ NativeFn: 4 }; END + # Comparison: 5-9 + IF pool[rootId] AS env THEN env.vars["="] = Value{ NativeFn: 5 }; END + IF pool[rootId] AS env THEN env.vars["<"] = Value{ NativeFn: 6 }; END + IF pool[rootId] AS env THEN env.vars[">"] = Value{ NativeFn: 7 }; END + IF pool[rootId] AS env THEN env.vars["<="] = Value{ NativeFn: 8 }; END + IF pool[rootId] AS env THEN env.vars[">="] = Value{ NativeFn: 9 }; END + # List: 10-15 + IF pool[rootId] AS env THEN env.vars["list"] = Value{ NativeFn: 10 }; END + IF pool[rootId] AS env THEN env.vars["list?"] = Value{ NativeFn: 11 }; END + IF pool[rootId] AS env THEN env.vars["empty?"] = Value{ NativeFn: 12 }; END + IF pool[rootId] AS env THEN env.vars["count"] = Value{ NativeFn: 13 }; END + IF pool[rootId] AS env THEN env.vars["not"] = Value{ NativeFn: 14 }; END + IF pool[rootId] AS env THEN env.vars["prn"] = Value{ NativeFn: 15 }; END + # Vector: 16-20 + IF pool[rootId] AS env THEN env.vars["vector"] = Value{ NativeFn: 16 }; END + IF pool[rootId] AS env THEN env.vars["vector-ref"] = Value{ NativeFn: 17 }; END + IF pool[rootId] AS env THEN env.vars["vector-set!"] = Value{ NativeFn: 18 }; END + IF pool[rootId] AS env THEN env.vars["vector-length"] = Value{ NativeFn: 19 }; END + IF pool[rootId] AS env THEN env.vars["vector?"] = Value{ NativeFn: 20 }; END + # Pair: 21-24 + IF pool[rootId] AS env THEN env.vars["cons"] = Value{ NativeFn: 21 }; END + IF pool[rootId] AS env THEN env.vars["car"] = Value{ NativeFn: 22 }; END + IF pool[rootId] AS env THEN env.vars["cdr"] = Value{ NativeFn: 23 }; END + IF pool[rootId] AS env THEN env.vars["pair?"] = Value{ NativeFn: 24 }; END + # Symbol comparison: 25 + IF pool[rootId] AS env THEN env.vars["eq?"] = Value{ NativeFn: 25 }; END + # String: 26-33 + IF pool[rootId] AS env THEN env.vars["string-append"] = Value{ NativeFn: 26 }; END + IF pool[rootId] AS env THEN env.vars["string-length"] = Value{ NativeFn: 27 }; END + IF pool[rootId] AS env THEN env.vars["substring"] = Value{ NativeFn: 28 }; END + IF pool[rootId] AS env THEN env.vars["string-ref"] = Value{ NativeFn: 29 }; END + IF pool[rootId] AS env THEN env.vars["number->string"] = Value{ NativeFn: 30 }; END + IF pool[rootId] AS env THEN env.vars["string->number"] = Value{ NativeFn: 31 }; END + IF pool[rootId] AS env THEN env.vars["string?"] = Value{ NativeFn: 32 }; END + IF pool[rootId] AS env THEN env.vars["display"] = Value{ NativeFn: 33 }; END + # error?, error-message, error-kind are special forms (not native fns) RETURN rootId; END --- Test helpers: create fresh interpreter, run input, return result string. +# Test helpers: create fresh interpreter, run input, return result string. -FN evalIn!(input: String, readable: Bool, rootId: Id, MUTABLE pool: Env[50000]@pool, MUTABLE penv: HashMap) RETURNS String -> +FN evalIn!(input: String, readable: Bool, rootId: Id, MUTABLE pool: Env[50000]@pool, MUTABLE penv: HashMap) RETURNS !String -> result = runTest!(input, rootId, pool, penv); RETURN prStr(result, readable); END -FN evalSeqIn!(inputs: String[], readable: Bool, rootId: Id, MUTABLE pool: Env[50000]@pool, MUTABLE penv: HashMap) RETURNS String -> +FN evalSeqIn!(inputs: String[], readable: Bool, rootId: Id, MUTABLE pool: Env[50000]@pool, MUTABLE penv: HashMap) RETURNS !String -> MUTABLE result: Value = Value.Nil; FOR i IN (0_i64 ..< inputs.length()) DO result = runTest!(inputs[i], rootId, pool, penv); @@ -691,41 +698,41 @@ FN evalSeqIn!(inputs: String[], readable: Bool, rootId: Id, MUTABLE pool: E RETURN prStr(result, readable); END -FN main() RETURNS Void -> +FN main() RETURNS !Void -> MUTABLE pool: Env[50000]@pool = []; MUTABLE penv: HashMap = {}; rootId = setupEnv!(pool); - -- Arithmetic + # Arithmetic ASSERT evalIn!("(+ 1 2)", TRUE, rootId, pool, penv) == "3", "(+ 1 2)"; ASSERT evalIn!("(- 5 3)", TRUE, rootId, pool, penv) =="2", "(- 5 3)"; ASSERT evalIn!("(* 3 4)", TRUE, rootId, pool, penv) =="12", "(* 3 4)"; ASSERT evalIn!("(/ 10 2)", TRUE, rootId, pool, penv) =="5", "(/ 10 2)"; ASSERT evalIn!("(+ (* 2 3) (- 10 4))", TRUE, rootId, pool, penv) =="12", "nested arithmetic"; - -- Variables + # Variables ASSERT evalIn!("(def! a 10)", TRUE, rootId, pool, penv) =="10", "def! returns value"; ASSERT evalSeqIn!(["(def! a 10)", "a"], TRUE, rootId, pool, penv) =="10", "def! is retrievable"; - -- Closures + # Closures ASSERT evalIn!("(def! f (fn* (x) (+ x 1)))", TRUE, rootId, pool, penv) =="#", "fn* creates function"; ASSERT evalSeqIn!(["(def! a 10)", "(def! f (fn* (x) (+ x a)))", "(f 5)"], TRUE, rootId, pool, penv) =="15", "closure captures outer"; ASSERT evalSeqIn!(["(def! add3 (fn* (a b c) (+ a (+ b c))))", "(add3 1 2 3)"], TRUE, rootId, pool, penv) =="6", "multi-param fn"; - -- Let bindings + # Let bindings ASSERT evalIn!("(let* (b 2 c 3) (+ b c))", TRUE, rootId, pool, penv) =="5", "let* scoped bindings"; ASSERT evalIn!("(let* (a 1 b (+ a 1)) b)", TRUE, rootId, pool, penv) =="2", "let* references earlier"; - -- Conditionals + # Conditionals ASSERT evalIn!("(if true 7 8)", TRUE, rootId, pool, penv) =="7", "if true"; ASSERT evalIn!("(if false 7 8)", TRUE, rootId, pool, penv) =="8", "if false"; ASSERT evalIn!("(if nil 7 8)", TRUE, rootId, pool, penv) =="8", "if nil"; ASSERT evalIn!("(if false 7)", TRUE, rootId, pool, penv) =="nil", "if without else"; - -- Sequential evaluation + # Sequential evaluation ASSERT evalIn!("(do (def! d 6) 7 (+ d 8))", TRUE, rootId, pool, penv) =="14", "do returns last"; - -- Comparison + # Comparison ASSERT evalIn!("(= 1 1)", TRUE, rootId, pool, penv) =="true", "= equal"; ASSERT evalIn!("(= 1 2)", TRUE, rootId, pool, penv) =="false", "= unequal"; ASSERT evalIn!("(> 2 1)", TRUE, rootId, pool, penv) =="true", "> comparison"; @@ -733,33 +740,33 @@ FN main() RETURNS Void -> ASSERT evalIn!("(<= 2 2)", TRUE, rootId, pool, penv) =="true", "<= equal"; ASSERT evalIn!("(>= 3 2)", TRUE, rootId, pool, penv) =="true", ">= greater"; - -- List operations + # List operations ASSERT evalIn!("(list 1 2 3)", FALSE, rootId, pool, penv) == "(1 2 3)", "list creates list"; ASSERT evalIn!("(count (list 1 2 3))", TRUE, rootId, pool, penv) =="3", "count"; ASSERT evalIn!("(empty? (list))", TRUE, rootId, pool, penv) =="true", "empty?"; ASSERT evalIn!("(not false)", TRUE, rootId, pool, penv) =="true", "not"; ASSERT evalIn!("(list? (list 1 2))", TRUE, rootId, pool, penv) =="true", "list?"; - -- Recursion + # Recursion ASSERT evalSeqIn!(["(def! sumdown (fn* (n) (if (> n 0) (+ n (sumdown (- n 1))) 0)))", "(sumdown 6)"], TRUE, rootId, pool, penv) =="21", "sumdown"; ASSERT evalSeqIn!(["(def! fib (fn* (n) (if (<= n 1) 1 (+ (fib (- n 1)) (fib (- n 2))))))", "(fib 4)"], TRUE, rootId, pool, penv) =="5", "fibonacci"; - -- Scheme syntax: define, lambda, let, begin + # Scheme syntax: define, lambda, let, begin ASSERT evalIn!("(define x 42)", TRUE, rootId, pool, penv) =="42", "define"; ASSERT evalSeqIn!(["(define x 42)", "x"], TRUE, rootId, pool, penv) =="42", "define retrievable"; ASSERT evalSeqIn!(["(define inc (lambda (n) (+ n 1)))", "(inc 5)"], TRUE, rootId, pool, penv) =="6", "lambda"; ASSERT evalIn!("(let ((a 3) (b 4)) (+ a b))", TRUE, rootId, pool, penv) =="7", "let"; ASSERT evalIn!("(begin 1 2 3)", TRUE, rootId, pool, penv) =="3", "begin"; - -- set!: mutable reassignment + # set!: mutable reassignment ASSERT evalSeqIn!(["(define x 1)", "(set! x 2)", "x"], TRUE, rootId, pool, penv) =="2", "set! reassigns"; ASSERT evalSeqIn!(["(define x 10)", "(define inc! (lambda () (set! x (+ x 1))))", "(inc!)", "x"], TRUE, rootId, pool, penv) =="11", "set! in closure"; - -- TCO: deep recursion with 50000-slot pool + # TCO: deep recursion with 50000-slot pool ASSERT evalSeqIn!(["(define countdown (lambda (n) (if (= n 0) 0 (countdown (- n 1)))))", "(countdown 5000)"], TRUE, rootId, pool, penv) =="0", "TCO countdown 5000"; ASSERT evalSeqIn!(["(define loop (lambda (n acc) (if (= n 0) acc (loop (- n 1) (+ acc n)))))", "(loop 2000 0)"], TRUE, rootId, pool, penv) =="2001000", "TCO accumulator 2000"; - -- Vector operations (STRUCT lowering) + # Vector operations (STRUCT lowering) ASSERT evalIn!("(vector 10 20 30)", TRUE, rootId, pool, penv) =="#(10 20 30)", "vector create"; ASSERT evalIn!("(vector-ref (vector 10 20 30) 0)", TRUE, rootId, pool, penv) =="10", "vector-ref 0"; ASSERT evalIn!("(vector-ref (vector 10 20 30) 2)", TRUE, rootId, pool, penv) =="30", "vector-ref 2"; @@ -767,24 +774,24 @@ FN main() RETURNS Void -> ASSERT evalIn!("(vector? (vector 1 2))", TRUE, rootId, pool, penv) =="true", "vector? true"; ASSERT evalIn!("(vector? (list 1 2))", TRUE, rootId, pool, penv) =="false", "vector? false on list"; - -- Pair operations (UNION lowering) + # Pair operations (UNION lowering) ASSERT evalIn!("(cons 1 2)", TRUE, rootId, pool, penv) =="(1 . 2)", "cons"; ASSERT evalIn!("(car (cons 1 2))", TRUE, rootId, pool, penv) =="1", "car"; ASSERT evalIn!("(cdr (cons 1 2))", TRUE, rootId, pool, penv) =="2", "cdr"; ASSERT evalIn!("(pair? (cons 1 2))", TRUE, rootId, pool, penv) =="true", "pair? true"; ASSERT evalIn!("(pair? (list 1 2))", TRUE, rootId, pool, penv) =="false", "pair? false on list"; - -- Tagged pairs for UNION lowering: (cons 'Tag payload) + # Tagged pairs for UNION lowering: (cons 'Tag payload) ASSERT evalIn!("(car (cons (quote Ok) 42))", TRUE, rootId, pool, penv) =="Ok", "tagged pair car"; ASSERT evalIn!("(cdr (cons (quote Ok) 42))", TRUE, rootId, pool, penv) =="42", "tagged pair cdr"; ASSERT evalIn!("(eq? (car (cons (quote Err) 0)) (quote Err))", TRUE, rootId, pool, penv) =="true", "tag eq?"; ASSERT evalIn!("(eq? (car (cons (quote Ok) 0)) (quote Err))", TRUE, rootId, pool, penv) =="false", "tag neq?"; - -- Quote returns unevaluated + # Quote returns unevaluated ASSERT evalIn!("(quote hello)", TRUE, rootId, pool, penv) =="hello", "quote symbol"; ASSERT evalIn!("(quote (1 2 3))", FALSE, rootId, pool, penv) == "(1 2 3)", "quote list"; - -- String operations + # String operations ASSERT evalIn!("(string-append \"hello\" \" \" \"world\")", TRUE, rootId, pool, penv) =="\"hello world\"", "string-append"; ASSERT evalIn!("(string-length \"abc\")", TRUE, rootId, pool, penv) =="3", "string-length"; ASSERT evalIn!("(substring \"abcdef\" 1 4)", TRUE, rootId, pool, penv) =="\"bcd\"", "substring"; @@ -795,19 +802,19 @@ FN main() RETURNS Void -> ASSERT evalIn!("(string? \"hi\")", TRUE, rootId, pool, penv) =="true", "string? true"; ASSERT evalIn!("(string? 42)", TRUE, rootId, pool, penv) =="false", "string? false"; - -- Error handling: raise + try/catch + # Error handling: raise + try/catch ASSERT evalIn!("(try (raise \"boom\" \"System\") (catch e (error-message e)))", FALSE, rootId, pool, penv) =="boom", "try/catch message"; ASSERT evalIn!("(try (raise \"x\" \"Input\") (catch e (error-kind e)))", FALSE, rootId, pool, penv) =="Input", "try/catch kind"; ASSERT evalIn!("(try (+ 1 2) (catch e \"bad\"))", TRUE, rootId, pool, penv) =="3", "try no error"; ASSERT evalIn!("(try (+ 1 (raise \"x\" \"E\")) (catch e \"caught\"))", FALSE, rootId, pool, penv) =="caught", "error propagates through +"; ASSERT evalIn!("(error? (raise \"x\" \"E\"))", TRUE, rootId, pool, penv) =="true", "error? on raise"; - -- Error propagation through define, begin, if + # Error propagation through define, begin, if ASSERT evalSeqIn!(["(define safe-div (lambda (a b) (if (= b 0) (raise \"div0\" \"Input\") (/ a b))))", "(try (safe-div 10 0) (catch e \"nope\"))"], FALSE, rootId, pool, penv) =="nope", "error from lambda"; ASSERT evalIn!("(try (begin 1 (raise \"mid\" \"E\") 3) (catch e \"stopped\"))", FALSE, rootId, pool, penv) =="stopped", "error in begin"; ASSERT evalIn!("(try (if (raise \"cond\" \"E\") 1 2) (catch e \"cond-err\"))", FALSE, rootId, pool, penv) =="cond-err", "error in if cond"; - -- Pool capacity: verify 50k pool handles deep programs + # Pool capacity: verify 50k pool handles deep programs ASSERT evalSeqIn!(["(define deep (lambda (n) (if (= n 0) 0 (+ 1 (deep (- n 1))))))", "(deep 100)"], TRUE, rootId, pool, penv) =="100", "deep non-TCO 100"; print("All 75 interpreter tests PASSED!"); From 156bc666f001ba927afa7c2f5152c70001866594 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sun, 10 May 2026 12:17:53 +0000 Subject: [PATCH 08/21] fix(annotator): generalize borrow-escape rejection across all binding forms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The compiler had two specialized rules — RETURN-of-WITH-alias and IF-AS non_escaping propagation — that left a memory-safety hole: borrows extracted via MATCH-AS, WHILE-AS, FOR-EACH, struct destructure, or pipeline AS could be returned, container-stored, or BG-captured with no diagnostic, producing UAF. Discovered while debugging examples/mal/interpreter.cht's "tagged pair car" test: prStr's `Value.Symbol AS s -> RETURN s;` returned a slice into a caller-owned Value whose cleanup-defer fired before the function actually returned. Caller dereferenced freed memory. Universal rule replacing the specialized ones: Any binding produced by a borrow-extracting pattern (WITH-AS, IF-AS, MATCH-AS, WHILE-AS, FOR-EACH, struct destructure in IF/MATCH, pipeline `AS $name`) is marked non_escaping AND borrowed_alias when the payload is non-Copy. visit_ReturnNode rejects RETURN of any chain rooted in such a binding (identifier, GetField, GetIndex) regardless of WITH context. The existing has_non_escaping_capture check rejects fiber capture via the same flag. This is mostly subtractive: the @with_block_depth>0 gate is lifted, the IF-AS conditional propagation is replaced with unconditional, and a single helper (mark_borrow_binding_non_escaping!) becomes the single source of truth. New diagnostic codes RETURN_OF_BORROW, RETURN_FIELD_OF_BORROW, RETURN_INDEX_OF_BORROW for non-WITH cases; existing WITH codes preserved for in-WITH context. Tests / specs updated to use `RETURN COPY s` (the safe form) where they used the unsafe pattern: 7 transpile-tests (119, 160, 162, 174, 175, 176, 179) plus spec/affine_ownership_spec.rb and spec/cleanup_plan_spec.rb. The test intent (cleanup-plan classification) is preserved. Regression artifacts: - transpile-tests/_xfail/383_uaf_match_as_borrow_returned.cht: minimal 25-line runtime repro; lives outside the auto-glob. - spec/transpiler_spec.rb: 7 new specs covering identifier, field, index, BG-capture, FOR-EACH, MATCH-destructure shapes, plus a `RETURN COPY` positive case. - spec/schemas_spec.rb: 7 unit tests for as_struct_schema and as_resource_schema fallbacks (closes 3-line coverage gap from the existing PR scope). - docs/postmortems/383_uaf_match_as_borrow_returned.md: full root-cause writeup, table of every borrow-introducing site, LEND-roadmap alignment. examples/mal/interpreter.cht updated with COPY at every flagged site so it transpiles cleanly. It still does not run end-to-end: prStr mixes frame-allocated string-concat returns with heap-allocated COPY returns, hitting an INV-1 (single-allocator-per-binding) violation that escape analysis currently does not unify across return branches. That is a separate compiler bug filed for follow-up. Verified: 4722 examples / 0 failures / 3 pending; Sorbet clean; all transpile-tests/*.cht produce valid Zig; 0 uncovered changed lines in src/. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../383_uaf_match_as_borrow_returned.md | 306 ++++++++++++++++++ examples/mal/interpreter.cht | 22 +- spec/affine_ownership_spec.rb | 2 +- spec/cleanup_plan_spec.rb | 2 +- spec/generics_spec.rb | 4 +- spec/mir_lowering_spec.rb | 8 +- spec/schemas_spec.rb | 73 +++++ spec/transpiler_spec.rb | 159 +++++++++ src/annotator.rb | 129 ++++++-- src/ast/diagnostic_registry.rb | 18 ++ .../119_hashmap_string_readback.cht | 2 +- transpile-tests/160_temp_value_no_cleanup.cht | 6 +- .../162_return_borrow_from_hpt.cht | 6 +- .../174_union_match_struct_fields.cht | 4 +- transpile-tests/175_tco_try_catch.cht | 10 +- transpile-tests/176_hashmap_union_dupe.cht | 2 +- .../179_hashmap_structlit_no_double_dupe.cht | 4 +- .../383_uaf_match_as_borrow_returned.cht | 52 +++ 18 files changed, 758 insertions(+), 51 deletions(-) create mode 100644 docs/postmortems/383_uaf_match_as_borrow_returned.md create mode 100644 spec/schemas_spec.rb create mode 100644 transpile-tests/_xfail/383_uaf_match_as_borrow_returned.cht diff --git a/docs/postmortems/383_uaf_match_as_borrow_returned.md b/docs/postmortems/383_uaf_match_as_borrow_returned.md new file mode 100644 index 000000000..a9513cb8f --- /dev/null +++ b/docs/postmortems/383_uaf_match_as_borrow_returned.md @@ -0,0 +1,306 @@ +# Postmortem: UAF returning a MATCH-AS borrow (issue 383) + +**Severity:** High — memory safety hole. CLEAR is supposed to make UAF +impossible at compile time. This bug emits unsafe Zig with no diagnostic. + +**Discovered:** while diagnosing why `examples/mal/interpreter.cht` segfaults +on the assertion `(car (cons (quote Ok) 42)) == "Ok"`. Out-of-band repro: +`transpile-tests/_xfail/383_uaf_match_as_borrow_returned.cht` (segfaults at +the first `print` of the returned string). + +## Symptom + +``` +1/1 ._clear_tmp_383...test...Segmentation fault at address 0x... +in concat (std.mem.zig) +in clearMain std.debug.print(... .{ "got [", s, "]" } ) +``` + +The returned `String` either prints as garbage bytes (`got [ÿÿ]`) or +segfaults on the next read, depending on what overwrites the freed slot. + +## Minimal repro (25 lines) + +```clear +UNION Value { + Number: Float64, + Symbol: String +} + +FN stringOf(v: Value) RETURNS String -> + PARTIAL MATCH v START + Value.Symbol AS s -> RETURN s;, + DEFAULT -> RETURN ""; + END + RETURN ""; +END + +FN getName!() RETURNS !String -> + result: Value = Value{ Symbol: COPY "Ok" }; + out = stringOf(result); + RETURN out; +END + +FN main() RETURNS Void -> + s = getName!(); + print("got [" + s + "]"); + ASSERT s == "Ok", "borrowed string returned across cleanup boundary"; +END +``` + +## Root cause + +`MATCH v AS s` binds `s` to a *borrow* of the variant payload. When the +payload is `String` (or any non-Copy type), `RETURN s` hands back a slice +that aliases caller-owned heap memory. + +The lowering of `getName!` in Zig: + +```zig +const result = ...; // owned heap Value +defer CheatLib.cleanup(Value, rt.heapAlloc(), &result); // queued +const out = try stringOf(rt, result); // out borrows result +return out; // captured first... + // ...then defer frees + // result -> dangling +``` + +`out`'s slice header (ptr+len) is copied into the return register before +defers fire, but the bytes it points to are freed by the cleanup defer +**before the function actually returns**. The caller dereferences a freed +pointer. + +## How it bypassed the static checker + +The language already has the right concept: `SymbolEntry#non_escaping`. It +is set on WITH-AS aliases by `src/annotator-helpers/capabilities.rb` and +checked at return sites by `src/annotator.rb:visit_ReturnNode` (lines +2244-2249). That check rejects `RETURN ident`, `RETURN ident.field`, and +`RETURN ident[i]` whenever `ident.symbol.non_escaping` is true — which is +exactly the shape of this bug. + +There are **two gaps** that prevent the check from firing: + +### Gap 1: MATCH-AS bindings aren't marked `non_escaping` + +`src/annotator.rb:1733-1737` correctly classifies MATCH-AS bindings as +borrowed in the OwnershipGraph: + +```ruby +# MATCH AS: borrow view into the source union's payload. +# MATCH TAKES: owned extraction - source is consumed. +unless node.takes + @og[c[:binding]]&.kind = :borrowed +end +``` + +But this never propagates to the `SymbolEntry`'s `non_escaping` flag — +the OG-level kind and the symbol-level `non_escaping` are independent +flags, and only the latter is consulted at return sites. + +### Gap 2: the return-of-borrow check is gated to WITH blocks + +`src/annotator.rb:2242`: + +```ruby +if (@with_block_depth || 0) > 0 + val = node.value + if val.is_a?(AST::Identifier) && val.symbol&.non_escaping + error!(node, :RETURN_FROM_WITH_SCOPED, ...) + elsif val.is_a?(AST::GetField) && val.target.respond_to?(:symbol) && val.target.symbol&.non_escaping + error!(node, :RETURN_FIELD_FROM_WITH_SCOPED, ...) + elsif val.is_a?(AST::GetIndex) && val.target.respond_to?(:symbol) && val.target.symbol&.non_escaping + error!(node, :RETURN_INDEX_FROM_WITH_SCOPED, ...) + end +end +``` + +The check is structurally correct. The `@with_block_depth > 0` gate makes +it fire **only** for WITH aliases. Outside any WITH block (e.g., in a +plain `MATCH` body), the gate is false and the rejection never runs — +even if `non_escaping` were set. + +### Why the MIR checker doesn't catch it either + +The MIR checker's seven invariants (CLAUDE.md, "Role 2") all assume the +lowering placed correct cleanup nodes for owned values. They do **not** +re-derive borrow vs. own at use sites. Per the design: + +> NEVER add to the checker: +> - "Consuming position" analysis — the lowering must emit `ErrCleanup` +> structurally. + +The borrow/own decision is the annotator's responsibility. The annotator +silently tagged this as a regular owned return (no cleanup-suppression +needed because it's not moving anything), the lowering emitted a `Cleanup` +on `result` and a borrow-pass-through `out = stringOf(result)`, and the +checker had nothing to flag. + +## Fix (landed in this PR) + +Generalized the rule: **any RETURN whose value is rooted at a non_escaping +binding is rejected, regardless of which construct introduced the borrow.** +This subsumes the WITH-only and IF-AS-only special cases. + +### `src/annotator.rb` — propagate `non_escaping` to all borrow-extracting +patterns when the payload is non-Copy + +A new helper `mark_borrow_binding_non_escaping!(entry, payload_type)` is +called from every binding site that produces a borrow: + +- `MATCH v AS s` (line ~1742): always mark non_escaping for non-Copy + payloads. Previously, the OG was set to `:borrowed` but the SymbolEntry + flag was never set. +- `IF expr AS x` (line ~1450): unconditionally mark non_escaping for + non-Copy unwrapped types. Previously only propagated when the *source* + was already non_escaping — missed locally-owned sources whose lifetime + ends with the function. +- `WHILE expr AS s` (line ~2095): same rule. + +### `src/annotator.rb:visit_ReturnNode` — lift the WITH-only gate + +The check that rejects `RETURN ident` / `RETURN ident.field` / +`RETURN ident[i]` when the root binding is non_escaping was previously +gated on `(@with_block_depth || 0) > 0`. Lifted that gate; the check now +fires universally. Inside WITH it still uses the WITH-specific error +codes (`RETURN_FROM_WITH_SCOPED`, ...) for better diagnostics; outside +WITH it uses the new generic codes (`RETURN_OF_BORROW`, +`RETURN_FIELD_OF_BORROW`, `RETURN_INDEX_OF_BORROW`) added to +`src/ast/diagnostic_registry.rb`. + +The chain root is now found by recursively walking GetField/GetIndex via +the existing `ifbind_source_root` helper, so `pp.p.left` (a two-level +GetField chain) is correctly rejected. + +### Why the universal rule is the right shape + +The OwnershipGraph already classifies bindings as `:owned` vs `:borrowed`. +Per-construct rejections were the wrong abstraction — they had to be +re-derived for each new binding form. The universal rule is "the OG knows +what's borrowed; ask it" — and the existing visit_ReturnNode predicate +already handles the relevant chain shapes. The fix is therefore tiny and +mostly subtractive (removing the `@with_block_depth > 0` gate, removing +the conditional in IF-AS that limited propagation to non_escaping +sources). + +### Two flags, two checks (kept distinct for now) + +- `non_escaping` → blocks RETURN of the binding (`visit_ReturnNode`) + and storing it in containers (`ensure_owned_value!`). +- `borrowed_alias` → blocks fiber/BG/lambda/stream capture + (`capabilities.rb` `has_non_escaping_capture`). Without + this flag a captured borrow becomes a fiber-stack UAF + the moment the BG outlives the binding's source. + +`mark_borrow_binding_non_escaping!` sets BOTH flags. The capture check at +`capabilities.rb:1099` reads `borrowed_alias` and is now reached by all +the same patterns (MATCH-AS / IF-AS / WHILE-AS / WITH-AS) that the return +check is reached by. + +### How LEND will slot in (TODO.md, planned) + +`LEND` is the future opt-in escape hatch that lets users explicitly +attach a lifetime to a function or BG boundary: + +```clear +IF maybe AS s THEN + t = BG @lent(s) { print(s); }; + NEXT t; # compiler-verified: BG joins before s's source ends +END +``` + +Today, capturing `s` is rejected (`borrowed_alias` set). When LEND lands, +the compiler will accept the capture only when (a) it is annotated `@lent` +and (b) the capture site is followed by a `NEXT` / join that proves the +BG terminates before the lender's scope. The default *rejection* is what +makes that opt-in safe — without the universal rule, LEND would have to +police every construct individually. + +### Other patterns affected + +The generalized rule also fires for: + +- `pp.p.left` returned from a MATCH-AS arm where `p` carries a struct + (chain rooted at non_escaping `pp`) — caught by the GetField walker. +- `IF map[k] AS v -> RETURN v` for non-Copy `v` — caught because IF-AS + bindings are now unconditionally non_escaping for non-Copy payloads. +- `WHILE iter AS x -> RETURN x` — analogous. + +### Tests / specs that needed updating + +The new rule caught 7 transpile-tests and 2 specs that used the exact +unsafe pattern (extracting a String from a HashMap/union via MATCH-AS +and returning the borrow). These were tests of cleanup-plan behavior +that happened to use UAF-shaped code; they were updated to use +`RETURN COPY s` (the safe form) which preserves the test intent. + +Updated tests: 119, 160, 162, 174, 175, 176, 179 (all in +`transpile-tests/`). +Updated specs: `spec/affine_ownership_spec.rb`, `spec/cleanup_plan_spec.rb`. + +### Borrow-introducing sites covered (the universal sweep) + +| Site | Marked | Notes | +|---|---|---| +| `WITH ... AS alias` | ✓ | Pre-existing | +| `IF expr AS x` | ✓ | Generalized — was conditional, now unconditional for non-Copy | +| `MATCH v AS s` | ✓ | New — same payload-borrow rule | +| `WHILE expr AS s` | ✓ | New | +| `FOR x IN coll` | ✓ | New — loop var is a borrow into each element | +| `MATCH v { Variant{a, b} -> ... }` | ✓ | New — each destructured field is a borrow | +| `IF expr AS { a, b }` | ✓ | New — same shape as MATCH destructure | +| `coll AS $name` (pipeline) | ✓ | New — `$name` aliases each element / the source | +| `WITH RESTRICT/BORROWED` | ✓ | Pre-existing (also sets borrowed_alias) | + +All ten share the same helper `mark_borrow_binding_non_escaping!` which sets +both `non_escaping` (rejects RETURN / container-store) and `borrowed_alias` +(rejects BG / lambda / stream capture). One source of truth. + +### Sites still NOT covered (deliberate) + +- Function parameter returns: `RETURN param` for non-Copy parameter + types currently proceeds via `was_moved` (the call site transfers + ownership). This is sound when the call site treats the return as a + move. Returning `param.field` is more nuanced and would need its own + audit. +- `MATCH TAKES x { Foo{a, b} -> ... }`: the destructured fields are + owned (the source is consumed), so they are NOT marked non_escaping. + The check is gated by `unless node.takes`. +- `for i in 0..10` (`ForRange`): loop var is always `Int64` (Copy), + so the helper short-circuits. + +## Regression guards + +- `transpile-tests/_xfail/383_uaf_match_as_borrow_returned.cht` — runtime + repro retained for posterity. Lives outside the auto-discovered + `transpile-tests/*.cht` glob; rerun manually after future changes to + the borrow-escape logic. +- `spec/transpiler_spec.rb`'s `MATCH-AS borrow escape (memory safety)` + block: three regression tests asserting (a) the rejection fires, (b) + `RETURN COPY` is the documented escape hatch, (c) field chains rooted + in MATCH-AS are also rejected. + +## Remaining gap (out of scope here) + +`examples/mal/interpreter.cht` now compile-errors at every unsafe site, +but adding `COPY` to fix them surfaces a separate, deeper bug: `prStr` +mixes frame-allocated string-concat returns with heap-allocated COPY +returns, producing an allocator mismatch at the call site. The MIR's +single-allocator-per-binding invariant (INV-1) is supposed to make this +impossible — the underlying issue is that escape analysis does not +unify the return allocator across branches when one branch already +heap-allocates. Filed as a follow-up (task #8). + +## Other patterns to audit + +Same gap likely exists for: + +- `IF x AS y` (already handled — see `src/annotator.rb:1403-1451` which + propagates non_escaping for IF-AS sources; this is the model the MATCH + fix should follow). +- `result = p.field` chained off a MATCH-AS binding `p` where `p`'s + variant carries `@indirect` or slice fields: returning `result` is also + unsafe and currently slips by for the same reason. +- Method returns shaped like `RETURN COPY items[i]` where the slice + itself is borrowed from a parameter — should be safe (COPY breaks the + borrow), but worth a targeted regression test once Gap 2 is closed. diff --git a/examples/mal/interpreter.cht b/examples/mal/interpreter.cht index d3fc0d327..f1dfe9b12 100644 --- a/examples/mal/interpreter.cht +++ b/examples/mal/interpreter.cht @@ -35,8 +35,8 @@ FN isTruthy?(v: Value) RETURNS Bool -> RETURN TRUE; END -FN getSymName(v: Value) RETURNS String -> - PARTIAL MATCH v START Value.Symbol AS s -> RETURN s;, DEFAULT -> RETURN ""; END +FN getSymName(v: Value) RETURNS !String -> + PARTIAL MATCH v START Value.Symbol AS s -> RETURN COPY s;, DEFAULT -> RETURN ""; END RETURN ""; END @@ -45,8 +45,8 @@ FN getNum(v: Value) RETURNS Float64 -> RETURN 0.0; END -FN getStr(v: Value) RETURNS String -> - PARTIAL MATCH v START Value.Str AS s -> RETURN s;, DEFAULT -> RETURN ""; END +FN getStr(v: Value) RETURNS !String -> + PARTIAL MATCH v START Value.Str AS s -> RETURN COPY s;, DEFAULT -> RETURN ""; END RETURN ""; END @@ -122,8 +122,8 @@ FN prStr(v: Value, readably: Bool) RETURNS !String EFFECTS REENTRANT -> Value.Number AS n -> IF n == floor(n) THEN RETURN toInt(n).toString(); END RETURN toInt(n).toString();, - Value.Str AS s -> IF readably THEN RETURN "\"" + s + "\""; END RETURN s;, - Value.Symbol AS s -> RETURN s;, + Value.Str AS s -> IF readably THEN RETURN "\"" + s + "\""; END RETURN COPY s;, + Value.Symbol AS s -> RETURN COPY s;, Value.List AS items -> MUTABLE out = "("; FOR li IN (0_i64 ..< items.length()) DO @@ -429,13 +429,13 @@ FN isError?(v: Value) RETURNS Bool -> RETURN FALSE; END -FN getErrMsg(v: Value) RETURNS String -> - PARTIAL MATCH v START Value.Error AS e -> RETURN e.errMsg;, DEFAULT -> RETURN ""; END +FN getErrMsg(v: Value) RETURNS !String -> + PARTIAL MATCH v START Value.Error AS e -> RETURN COPY e.errMsg;, DEFAULT -> RETURN ""; END RETURN ""; END -FN getErrKind(v: Value) RETURNS String -> - PARTIAL MATCH v START Value.Error AS e -> RETURN e.errKind;, DEFAULT -> RETURN ""; END +FN getErrKind(v: Value) RETURNS !String -> + PARTIAL MATCH v START Value.Error AS e -> RETURN COPY e.errKind;, DEFAULT -> RETURN ""; END RETURN ""; END @@ -471,7 +471,7 @@ FN eval!(astIn: Value, envId: Id, MUTABLE pool: Env[50000]@pool) RETURNS !V result = evalList!(listItems, curEnv, pool); PARTIAL MATCH result START Value.Tco AS tco -> - ast = tco.tcoAst; + ast = COPY tco.tcoAst; curEnv = tco.tcoEnv;, DEFAULT -> RETURN result; END, diff --git a/spec/affine_ownership_spec.rb b/spec/affine_ownership_spec.rb index 8d12a4889..ee4de2943 100644 --- a/spec/affine_ownership_spec.rb +++ b/spec/affine_ownership_spec.rb @@ -772,7 +772,7 @@ def get_last_type(source) FN test!(MUTABLE map: HashMap) RETURNS !String -> val = map["t0"] OR Value.Nil; PARTIAL MATCH val START - Value.Str AS s -> RETURN s;, + Value.Str AS s -> RETURN COPY s;, DEFAULT -> RETURN ""; END RETURN ""; diff --git a/spec/cleanup_plan_spec.rb b/spec/cleanup_plan_spec.rb index da1de304e..8596a78ca 100644 --- a/spec/cleanup_plan_spec.rb +++ b/spec/cleanup_plan_spec.rb @@ -54,7 +54,7 @@ def cleanup_for(src, fn_name) FN test!(MUTABLE map: HashMap) RETURNS !String -> val = map["t0"] OR Value.Nil; PARTIAL MATCH val START - Value.Str AS s -> RETURN s;, + Value.Str AS s -> RETURN COPY s;, DEFAULT -> RETURN ""; END RETURN ""; diff --git a/spec/generics_spec.rb b/spec/generics_spec.rb index 99c3c876c..afcb4d38c 100644 --- a/spec/generics_spec.rb +++ b/spec/generics_spec.rb @@ -728,7 +728,9 @@ def fn_err_src(fn_code, call_code = "PASS") out = ZigTranspiler.new.transpile(src) expect(out).to include("Cache(CheatLib.Arc(CheatLib.Locked(Box)))") expect(out).to include("get(CheatLib.Arc(CheatLib.Locked(Box)), c)") - expect(out).to include("set(CheatLib.Arc(CheatLib.Locked(Box)), c, got)") + # `set!(MUTABLE c, ...)` is a mutable value param: the caller + # passes `&c` and the callee receives `*Cache(...)`. + expect(out).to include("set(CheatLib.Arc(CheatLib.Locked(Box)), &c, got)") expect(out).to include("CheatLib.arcRetain(CheatLib.Locked(Box), b)") end diff --git a/spec/mir_lowering_spec.rb b/spec/mir_lowering_spec.rb index 530379a08..789796a19 100644 --- a/spec/mir_lowering_spec.rb +++ b/spec/mir_lowering_spec.rb @@ -1270,8 +1270,12 @@ def make_fn(name, params: [], return_type: :Void, body: [], visibility: nil, fn = make_fn("inc", params: params, body: [body_stmt]) result = lowering.lower(fn) zig = emit(result) - expect(zig).to include("_m_count: i64") - expect(zig).to include("var count = _m_count;") + # Mutable scalar params take a pointer so callee mutations + # propagate to the caller. The prologue shadows `_m_count.*` with + # a local `var count`; a defer writes it back on every exit. + expect(zig).to include("_m_count: *i64") + expect(zig).to include("var count = _m_count.*;") + expect(zig).to include("_m_count.* = count") end end diff --git a/spec/schemas_spec.rb b/spec/schemas_spec.rb new file mode 100644 index 000000000..57ac79179 --- /dev/null +++ b/spec/schemas_spec.rb @@ -0,0 +1,73 @@ +require "rspec" +require_relative "../src/ast/schemas" + +RSpec.describe Schemas do + describe ".as_resource_schema" do + it "returns nil for non-resource hashes" do + expect(Schemas.as_resource_schema({ a: 1 })).to be_nil + end + + it "uses :fields when explicitly present" do + rs = Schemas.as_resource_schema( + kind: :resource, + fields: { "x" => :Int64 }, + close_zig: "{0}.close()", + ) + expect(rs).to be_a(Schemas::ResourceSchema) + expect(rs.fields).to eq({ "x" => :Int64 }) + expect(rs.close_zig).to eq("{0}.close()") + end + + it "falls back to non-Symbol entries when :fields is absent" do + # Mirrors what visit_ExternStructDecl produces (annotator.rb:657-666): + # bare string-keyed field entries plus :kind/:close_zig/:extern_module + # symbol-keyed metadata. The fallback at schemas.rb:140 must extract + # only the string-keyed entries as fields. + rs = Schemas.as_resource_schema( + "value" => :JsonRecord, + :kind => :resource, + :close_zig => "{0}.deinit()", + :extern_module => "std.json", + ) + expect(rs).to be_a(Schemas::ResourceSchema) + expect(rs.fields).to eq({ "value" => :JsonRecord }) + expect(rs.close_zig).to eq("{0}.deinit()") + expect(rs.extern_module).to eq("std.json") + end + + end + + describe ".as_struct_schema" do + it "returns nil for kind: :union hashes" do + expect(Schemas.as_struct_schema({ kind: :union, variants: {} })).to be_nil + end + + it "uses :fields when explicitly present" do + ss = Schemas.as_struct_schema(fields: { "a" => :Int64 }) + expect(ss).to be_a(Schemas::StructSchema) + expect(ss.fields).to eq({ "a" => :Int64 }) + end + + it "falls back to non-Symbol entries when :fields is absent" do + # Same bare-key shape as as_resource_schema: visit_ExternStructDecl + # also feeds this path for non-resource extern structs. + ss = Schemas.as_struct_schema( + "id" => :Int64, + "data" => :"Int64[]", + :extern_module => "std.json", + ) + expect(ss).to be_a(Schemas::StructSchema) + expect(ss.fields).to eq({ "id" => :Int64, "data" => :"Int64[]" }) + expect(ss.extern_module).to eq("std.json") + end + + it "accepts kind: :resource (lets resources also be viewed as structs)" do + ss = Schemas.as_struct_schema( + kind: :resource, + fields: { "fd" => :Int64 }, + ) + expect(ss).to be_a(Schemas::StructSchema) + expect(ss.fields).to eq({ "fd" => :Int64 }) + end + end +end diff --git a/spec/transpiler_spec.rb b/spec/transpiler_spec.rb index 6ea7edbbd..89154f312 100644 --- a/spec/transpiler_spec.rb +++ b/spec/transpiler_spec.rb @@ -9,6 +9,165 @@ def transpile(src) ZigTranspiler.new.transpile(src) end + describe "MATCH-AS borrow escape (memory safety)" do + # Bug: returning a borrow extracted via `MATCH v AS s` from a function + # produces a use-after-free. The caller's cleanup-defer for the value + # backing `s` fires AFTER the slice is captured into the return value + # but BEFORE the function actually returns. The returned slice points + # into freed memory -> segfault / corruption (see MAL "tagged pair car" + # repro and transpile-tests/_xfail/383_uaf_match_as_borrow_returned.cht). + # + # Fix sketch (annotator.rb): + # 1. line 1733-1737: when binding `Value.Variant AS s` and the payload + # is non-Copy (String / slice / @indirect / container), also set + # `current_scope.locals[binding].non_escaping = true`. + # 2. lines 2242-2250: the existing GetField/GetIndex chain rejection + # is gated on `@with_block_depth > 0`. Lift the gate so the + # `non_escaping` return check fires unconditionally; WITH aliases + # keep their specific error code, MATCH-AS borrows get a generic + # RETURN_OF_BORROWED_PAYLOAD. + it "rejects returning a String borrowed from a MATCH-AS binding" do + src = <<~CLEAR + UNION Value { Number: Float64, Symbol: String } + + FN stringOf(v: Value) RETURNS String -> + PARTIAL MATCH v START + Value.Symbol AS s -> RETURN s;, + DEFAULT -> RETURN ""; + END + RETURN ""; + END + CLEAR + + expect { transpile(src) }.to raise_error(/borrow/i) + end + + it "allows returning a deep-copy (COPY) of the borrowed payload" do + # `COPY` can allocate, so the function must declare it can fail. + src = <<~CLEAR + UNION Value { Number: Float64, Symbol: String } + + FN stringOf(v: Value) RETURNS !String -> + PARTIAL MATCH v START + Value.Symbol AS s -> RETURN COPY s;, + DEFAULT -> RETURN ""; + END + RETURN ""; + END + CLEAR + + expect { transpile(src) }.not_to raise_error + end + + it "rejects capturing an IF-AS borrow into a BG block (fiber UAF)" do + # The BG block may outlive the enclosing function frame; capturing an + # IF-AS borrow whose source lives on that frame is a fiber-stack UAF. + # When LEND lands this becomes the explicit opt-in escape hatch + # (e.g. `BG @lent(s) { ... }`). + src = <<~CLEAR + FN spawn!() RETURNS Void -> + maybe: ?String = "Ok"; + IF maybe AS s THEN + t = BG { print(s); }; + NEXT t; + END + END + CLEAR + + expect { transpile(src) }.to raise_error(/borrow|capture|UAF|fiber/i) + end + + it "rejects returning a field of a MATCH-AS borrow when the field is non-Copy" do + src = <<~CLEAR + STRUCT Pair { left: String, right: String } + UNION Tagged { Empty, P { p: Pair } } + + FN leftOf(t: Tagged) RETURNS String -> + PARTIAL MATCH t START + Tagged.P AS pp -> RETURN pp.p.left;, + DEFAULT -> RETURN ""; + END + RETURN ""; + END + CLEAR + + expect { transpile(src) }.to raise_error(/borrow/i) + end + + it "rejects returning an indexed access of a MATCH-AS borrow (RETURN_INDEX_OF_BORROW)" do + # MATCH-AS bindings are non_escaping for non-Copy payloads. A GetIndex + # rooted at one is also non_escaping — must be rejected. Use a List + # payload (`@list`) since plain slices are treated as Copy. + src = <<~CLEAR + UNION Bag { Empty, Items: String[]@list } + + FN firstOf(b: Bag) RETURNS String -> + PARTIAL MATCH b START + Bag.Items AS arr -> RETURN arr[0];, + DEFAULT -> RETURN ""; + END + RETURN ""; + END + CLEAR + + expect { transpile(src) }.to raise_error(/borrow/i) + end + + it "rejects returning a FOR-EACH loop variable (non-Copy element)" do + # FOR x IN coll binds x to a borrow of each element. If the element + # type is non-Copy (here a struct holding a String), returning x + # leaks a slice into freed memory the moment `coll` is dropped. + src = <<~CLEAR + STRUCT Box { tag: String } + + FN findFirst(items: Box[]@list) RETURNS String -> + FOR b IN items DO + RETURN b.tag; + END + RETURN ""; + END + CLEAR + + expect { transpile(src) }.to raise_error(/borrow/i) + end + + it "rejects returning a MATCH struct-destructure field (non-Copy)" do + # MATCH `Variant{a, b}` destructures the variant payload. The bound + # field names are borrows into the source — same UAF class as + # `Variant AS p -> RETURN p.a`. + src = <<~CLEAR + STRUCT Pair { left: String, right: String } + UNION Tagged { Empty, P { p: Pair } } + + FN leftOf(t: Tagged) RETURNS String -> + PARTIAL MATCH t START + Tagged.P { p } -> RETURN p.left;, + DEFAULT -> RETURN ""; + END + RETURN ""; + END + CLEAR + + expect { transpile(src) }.to raise_error(/borrow/i) + end + + it "rejects returning an indexed access of a WITH-scoped alias (RETURN_INDEX_FROM_WITH_SCOPED)" do + # WITH aliases are non_escaping. The same GetIndex chain check fires + # inside WITH but uses the WITH-specific error code so the diagnostic + # can mention the lock. The fn name needs `!` because it mutates the + # locked map. + src = <<~CLEAR + FN at!(MUTABLE m: HashMap) RETURNS String -> + WITH EXCLUSIVE m AS h { + RETURN h["k"]; + } + END + CLEAR + + expect { transpile(src) }.to raise_error(/locked|WITH|borrow/i) + end + end + describe "BG promise capture regressions" do it "allows footguns/06-style consumer BG to NEXT a producer promise captured from the same scope" do pending("MIR capture classification currently refuses captured Promise handles as unclassified_capture") diff --git a/src/annotator.rb b/src/annotator.rb index e2877eb0d..edf5e5054 100644 --- a/src/annotator.rb +++ b/src/annotator.rb @@ -1400,7 +1400,8 @@ def visit_IfStatement(node) end # Walk through field/index chains to the root binding. Used to determine - # whether an IF-AS source borrows from a non_escaping binding. + # whether an IF-AS source borrows from a non_escaping binding, and (more + # generally) whether the root of a RETURN chain is a non_escaping borrow. sig { params(expr: T.untyped).returns(T.nilable(AST::Identifier)) } def ifbind_source_root(expr) case expr @@ -1411,6 +1412,36 @@ def ifbind_source_root(expr) end end + # MATCH-AS / IF-AS / WHILE-AS bindings are *borrows* into the source value's + # payload. When the payload type carries heap-owned data (String, slice, + # @indirect, container), the binding's lifetime is bounded by the source's + # lifetime — escaping it via RETURN, storing it in an outer container, or + # capturing it into a BG / lambda / stream is a use-after-free. + # + # Two flags, two checks: + # - non_escaping → blocks RETURN of the binding (visit_ReturnNode) and + # storing it in containers (ensure_owned_value!). + # - borrowed_alias → blocks fiber/BG/lambda/stream capture + # (capabilities.rb has_non_escaping_capture). Without + # this flag a captured borrow becomes a fiber-stack + # UAF the moment the BG outlives the binding's source. + # + # Copy types (primitives, Id, rodata strings, all-Copy structs/unions) are + # exempt — copy semantics make the borrow self-contained. + # + # When LEND lands (TODO.md), it becomes the explicit escape hatch: + # `BG @lent(s) { ... }` will tell the compiler to verify the BG joins before + # the lender's scope ends. The default rejection here is what makes that + # opt-in safe: capture is rejected unless you've LENT it. + sig { params(entry: T.untyped, payload_type: T.untyped).void } + def mark_borrow_binding_non_escaping!(entry, payload_type) + return unless entry + ti = payload_type.is_a?(Type) ? payload_type : Type.new(payload_type) + return if ti.implicitly_copyable? { |t| lookup_type_schema(t) rescue nil } + entry.non_escaping = true + entry.borrowed_alias = true if entry.respond_to?(:borrowed_alias=) + end + sig { params(node: AST::IfBind).returns(Symbol) } def visit_IfBind(node) # Visit and validate each binding expression. @@ -1441,11 +1472,16 @@ def visit_IfBind(node) current_scope.declare(b[:name], nil, unwrapped, false, false, nil, :stack) entry = current_scope.locals[b[:name]] b[:symbol] = entry - # Propagate non_escaping when the source is borrow-derived from a - # non_escaping binding (a WITH alias or another transitive borrow - # of one). IF-AS on `p[i]` / `p.field` where `p` is the alias - # makes the new binding a borrow into locked data; it must not - # escape the enclosing WITH scope either. + # IF-AS unwraps an optional and binds the inner value. For non-Copy + # payloads the binding aliases data owned by the source (whether the + # source itself is owned or borrowed), so the binding's lifetime is + # bounded by the source's. Returning it or storing it in an outer + # container is a UAF. Universal rule — independent of whether the + # source is itself non_escaping. + mark_borrow_binding_non_escaping!(entry, unwrapped) + # Belt-and-suspenders: if the source is *itself* non_escaping (e.g. + # WITH alias) and somehow the payload is Copy, still propagate so + # transitive checks (container insertion) keep working. src_root = ifbind_source_root(b[:expr]) if src_root && src_root.symbol&.non_escaping entry.non_escaping = true @@ -1507,6 +1543,10 @@ def annotate_struct_pattern!(match_node, pat) field_type = field_type.is_a?(Type) ? field_type : Type.new(field_type) current_scope.declare(f[:name], nil, field_type, false, false, nil, :stack) og_declare(f[:name], nil, field_type) + # IF / MATCH struct destructure binds borrows into the source value's + # fields. Non-Copy field bindings inherit the source's lifetime. + @og[f[:name]]&.kind = :borrowed + mark_borrow_binding_non_escaping!(current_scope.locals[f[:name]], field_type) end else visit(f[:value]) @@ -1734,6 +1774,12 @@ def visit_MatchStatement(node) # MATCH TAKES: owned extraction - source is consumed. unless node.takes @og[c[:binding]]&.kind = :borrowed + # The binding aliases data inside the matched source. If the + # payload is non-Copy, returning it (or storing it in an + # outer container) would dangle once the source's cleanup + # fires. Universal rule, not WITH-specific. + bound_entry = current_scope.locals[c[:binding]] + mark_borrow_binding_non_escaping!(bound_entry, bound_entry&.type) if bound_entry&.type end end end @@ -1803,6 +1849,14 @@ def visit_MatchStatement(node) field_type = field_type.is_a?(Type) ? field_type : Type.new(field_type) current_scope.declare(f[:name], nil, field_type, false, false, nil, :stack) og_declare(f[:name], nil, field_type) + # MATCH `Variant{a, b}` destructure: each field binding is a + # borrow into the variant payload, parallel to `Variant AS p`. + # Without TAKES, the source is not consumed; non-Copy field + # bindings inherit the source's lifetime and must not escape. + unless node.takes + @og[f[:name]]&.kind = :borrowed + mark_borrow_binding_non_escaping!(current_scope.locals[f[:name]], field_type) + end end end end @@ -1967,6 +2021,12 @@ def visit_ForEach(node) current_scope.declare(node.var_name, nil, elem_sym, node.is_mutable == true, false, nil, :stack) node.symbol = current_scope.locals[node.var_name] classify_ownership!(node.symbol) + # FOR x IN coll binds x to a borrow of each element. The element's + # backing storage is owned by `coll`, not by the loop body — escaping + # x via RETURN, container-store, or BG capture is a UAF as soon as + # `coll` is mutated or freed. Universal rule, parallel to MATCH-AS. + @og[node.var_name.to_s]&.kind = :borrowed + mark_borrow_binding_non_escaping!(node.symbol, elem_sym) visit_stmts(node.body) finalize_scope(node) node.deferred_drops @@ -2084,6 +2144,10 @@ def visit_WhileBindLoop(node) entry = current_scope.locals[node.binding_name] classify_ownership!(entry) og_declare(node.binding_name.to_s, nil, unwrapped) + # WHILE-AS, like IF-AS / MATCH-AS, binds an unwrapped borrow into the + # condition's payload. For non-Copy payloads the binding's lifetime is + # bounded by the source — returning it dangles. + mark_borrow_binding_non_escaping!(entry, unwrapped) visit_stmts(node.do_branch) finalize_scope(node) @@ -2232,21 +2296,40 @@ def visit_ReturnNode(node) visit(node.value) - # RETURN inside a WITH block is forbidden ONLY when the returned value - # carries a borrow of the WITH alias (the `AS` binding). Pure values - # — primitives, fresh values returned by methods on the alias (e.g. - # `p.insert(...)` returning a fresh Id) — escape safely. The - # SymbolEntry#non_escaping flag is set on every WITH alias by - # declare_capability_scope!; it's the same flag ensure_owned_value! - # already uses to prevent storing WITH-scoped values in containers. - if (@with_block_depth || 0) > 0 - val = node.value - if val.is_a?(AST::Identifier) && val.symbol&.non_escaping + # Returning a borrow is unsafe regardless of WHICH construct introduced + # it (WITH alias, IF-AS, MATCH-AS, WHILE-AS, struct-with-borrowed-fields, + # observable, ...). The borrow's source goes out of scope when this + # function returns; the slice / pointer the caller receives points into + # freed memory. + # + # The universal rule: any RETURN whose value is rooted (via plain + # identifier, GetField chain, GetIndex chain) at a SymbolEntry with + # non_escaping=true is rejected. Inside a WITH block we use the older + # WITH-specific error codes for better diagnostics (they mention locks); + # outside WITH we use the generic RETURN_OF_BORROW family. + val = node.value + in_with = (@with_block_depth || 0) > 0 + # Walk arbitrarily deep GetField/GetIndex chains to the root identifier. + # `pp.p.left` is GetField{target: GetField{...}} — a single-level check + # would miss it; ifbind_source_root recurses through the whole chain. + root = ifbind_source_root(val) if val.is_a?(AST::GetField) || val.is_a?(AST::GetIndex) + if val.is_a?(AST::Identifier) && val.symbol&.non_escaping + if in_with error!(node, :RETURN_FROM_WITH_SCOPED, name: val.name, hint: "WITH aliases are borrows of locked data and cannot escape their scope.") - elsif val.is_a?(AST::GetField) && val.target.respond_to?(:symbol) && val.target.symbol&.non_escaping + else + error!(node, :RETURN_OF_BORROW, name: val.name) + end + elsif val.is_a?(AST::GetField) && root&.symbol&.non_escaping + if in_with error!(node, :RETURN_FIELD_FROM_WITH_SCOPED, hint: "Field access borrows from the locked data; the borrow cannot escape the WITH scope.") - elsif val.is_a?(AST::GetIndex) && val.target.respond_to?(:symbol) && val.target.symbol&.non_escaping + else + error!(node, :RETURN_FIELD_OF_BORROW, name: T.must(root).name) + end + elsif val.is_a?(AST::GetIndex) && root&.symbol&.non_escaping + if in_with error!(node, :RETURN_INDEX_FROM_WITH_SCOPED, hint: "Index access borrows from the locked data; the borrow cannot escape the WITH scope.") + else + error!(node, :RETURN_INDEX_OF_BORROW, name: T.must(root).name) end end promote_to_expr_if!(node, node.value) if node.value.is_a?(AST::IfStatement) @@ -4142,6 +4225,16 @@ def visit_BindVar(node) :stack ) + # Pipeline AS-binding (`coll AS $u`): $u aliases each element of `coll` + # (or, for a scalar source, the source itself). Either shape is a borrow + # into the LHS — non-Copy bindings inherit the LHS's lifetime and must + # not escape via RETURN, container-store, or BG capture. + bind_entry = current_scope.locals[var_name] + if bind_entry + @og[var_name]&.kind = :borrowed if @og + mark_borrow_binding_non_escaping!(bind_entry, binding_type) + end + # The result of the operation is the collection itself (passthrough for pipeline) node.full_type = lhs_type end diff --git a/src/ast/diagnostic_registry.rb b/src/ast/diagnostic_registry.rb index 4928b8d51..2c17d4f1a 100644 --- a/src/ast/diagnostic_registry.rb +++ b/src/ast/diagnostic_registry.rb @@ -1324,6 +1324,24 @@ module DiagnosticRegistry template: "Cannot RETURN an indexed access of a WITH-scoped binding. %{hint}", summary: "Indexed access on a WITH-scoped binding can't be returned (would outlive the WITH).", }, + RETURN_OF_BORROW: { + severity: :error, category: :escape, + template: "Cannot RETURN '%{name}' — it is a borrow whose source's lifetime ends with this scope.", + summary: "Returning a borrow (MATCH-AS / IF-AS / WHILE-AS / FOR-EACH binding, or a field/index chain rooted in one) lets the caller see a slice whose backing storage is freed when this function returns.", + fix_hint: "Use `RETURN COPY %{name}` to break the borrow, or restructure so the value is owned by this function before being returned.", + }, + RETURN_FIELD_OF_BORROW: { + severity: :error, category: :escape, + template: "Cannot RETURN a field of borrowed binding '%{name}'. The backing storage is freed when this function returns.", + summary: "A field of a borrowed binding inherits the borrow — returning it produces a use-after-free.", + fix_hint: "Wrap the field access in COPY (e.g. `RETURN COPY %{name}.field`).", + }, + RETURN_INDEX_OF_BORROW: { + severity: :error, category: :escape, + template: "Cannot RETURN an indexed access of borrowed binding '%{name}'. The backing storage is freed when this function returns.", + summary: "An indexed access on a borrowed binding inherits the borrow — returning it produces a use-after-free.", + fix_hint: "Wrap the indexed access in COPY (e.g. `RETURN COPY %{name}[i]`).", + }, # Function calls INTRINSIC_NO_OVERLOAD: { diff --git a/transpile-tests/119_hashmap_string_readback.cht b/transpile-tests/119_hashmap_string_readback.cht index 3ad4eee7f..e80d760ac 100644 --- a/transpile-tests/119_hashmap_string_readback.cht +++ b/transpile-tests/119_hashmap_string_readback.cht @@ -5,7 +5,7 @@ UNION Value { Nil, Str: String } FN readStrDirect!(MUTABLE map: HashMap) RETURNS !String -> val = map["t0"] OR Value.Nil; PARTIAL MATCH val START - Value.Str AS s -> RETURN s;, + Value.Str AS s -> RETURN COPY s;, DEFAULT -> RETURN ""; END RETURN ""; diff --git a/transpile-tests/160_temp_value_no_cleanup.cht b/transpile-tests/160_temp_value_no_cleanup.cht index f9201b6ec..8dc5f2003 100644 --- a/transpile-tests/160_temp_value_no_cleanup.cht +++ b/transpile-tests/160_temp_value_no_cleanup.cht @@ -8,9 +8,9 @@ UNION Val { Lambda { params: Val[], body: Val @indirect, envId: Int64 } } -FN getStr(v: Val) RETURNS String -> +FN getStr(v: Val) RETURNS !String -> PARTIAL MATCH v START - Val.Str AS s -> RETURN s;, + Val.Str AS s -> RETURN COPY s;, DEFAULT -> RETURN "nil"; END RETURN "nil"; @@ -22,7 +22,7 @@ FN makeLambda!() RETURNS !Val -> RETURN Val.Lambda{ params: COPY params, body: Val{ Str: "body" }, envId: 0 }; END -FN wrapStr!(v: Val) RETURNS String -> +FN wrapStr!(v: Val) RETURNS !String -> RETURN getStr(v); END diff --git a/transpile-tests/162_return_borrow_from_hpt.cht b/transpile-tests/162_return_borrow_from_hpt.cht index 4da505441..52eb95dd9 100644 --- a/transpile-tests/162_return_borrow_from_hpt.cht +++ b/transpile-tests/162_return_borrow_from_hpt.cht @@ -7,9 +7,9 @@ UNION Val { Lambda { params: Val[], body: Val @indirect, envId: Int64 } } -FN getStr(v: Val) RETURNS String -> +FN getStr(v: Val) RETURNS !String -> PARTIAL MATCH v START - Val.Str AS s -> RETURN s;, + Val.Str AS s -> RETURN COPY s;, DEFAULT -> RETURN "nil"; END RETURN "nil"; @@ -32,7 +32,7 @@ FN makePure() RETURNS Val -> RETURN Val.Nil; END -FN wrap(v: Val) RETURNS String -> +FN wrap(v: Val) RETURNS !String -> RETURN getStr(v); END diff --git a/transpile-tests/174_union_match_struct_fields.cht b/transpile-tests/174_union_match_struct_fields.cht index 04906e0b2..32f0ddb33 100644 --- a/transpile-tests/174_union_match_struct_fields.cht +++ b/transpile-tests/174_union_match_struct_fields.cht @@ -24,8 +24,8 @@ END FN prStr(v: Value) RETURNS !String -> PARTIAL MATCH v START Value.Nil -> RETURN "nil";, - Value.Str AS s -> RETURN s;, - Value.Wrapped AS w -> RETURN w.inner;, + Value.Str AS s -> RETURN COPY s;, + Value.Wrapped AS w -> RETURN COPY w.inner;, Value.Error AS e -> RETURN "error:" + e.errMsg; END RETURN ""; diff --git a/transpile-tests/175_tco_try_catch.cht b/transpile-tests/175_tco_try_catch.cht index db3b518a8..1a198fe37 100644 --- a/transpile-tests/175_tco_try_catch.cht +++ b/transpile-tests/175_tco_try_catch.cht @@ -9,17 +9,17 @@ UNION Value { Tco { tcoAst: Value @indirect, tcoEnv: Int64 }, } -FN getStr(v: Value) RETURNS String -> +FN getStr(v: Value) RETURNS !String -> PARTIAL MATCH v START - Value.Str AS s -> RETURN s;, + Value.Str AS s -> RETURN COPY s;, DEFAULT -> RETURN ""; END RETURN ""; END -FN getSymName(v: Value) RETURNS String -> +FN getSymName(v: Value) RETURNS !String -> PARTIAL MATCH v START - Value.Symbol AS s -> RETURN s;, + Value.Symbol AS s -> RETURN COPY s;, DEFAULT -> RETURN ""; END RETURN ""; @@ -84,7 +84,7 @@ END FN prStr(v: Value, readably: Bool) RETURNS !String -> PARTIAL MATCH v START Value.Nil -> RETURN "nil";, - Value.Str AS s -> IF readably THEN RETURN "\"" + s + "\""; END RETURN s;, + Value.Str AS s -> IF readably THEN RETURN "\"" + s + "\""; END RETURN COPY s;, Value.Error AS e -> RETURN "error:" + e.errMsg;, DEFAULT -> RETURN "?"; END diff --git a/transpile-tests/176_hashmap_union_dupe.cht b/transpile-tests/176_hashmap_union_dupe.cht index cec5ab64a..9b554b4b0 100644 --- a/transpile-tests/176_hashmap_union_dupe.cht +++ b/transpile-tests/176_hashmap_union_dupe.cht @@ -15,7 +15,7 @@ FN prStr(v: Value) RETURNS !String -> PARTIAL MATCH v START Value.Nil -> RETURN "nil";, Value.Number AS n -> RETURN n.toString();, - Value.Str AS s -> RETURN s;, + Value.Str AS s -> RETURN COPY s;, Value.List -> RETURN "(list)"; END RETURN ""; diff --git a/transpile-tests/179_hashmap_structlit_no_double_dupe.cht b/transpile-tests/179_hashmap_structlit_no_double_dupe.cht index b4613d22c..91e95e60e 100644 --- a/transpile-tests/179_hashmap_structlit_no_double_dupe.cht +++ b/transpile-tests/179_hashmap_structlit_no_double_dupe.cht @@ -4,9 +4,9 @@ UNION Value { Nil, Str: String, Number: Float64 } -FN getStr(v: Value) RETURNS String -> +FN getStr(v: Value) RETURNS !String -> PARTIAL MATCH v START - Value.Str AS s -> RETURN s;, + Value.Str AS s -> RETURN COPY s;, DEFAULT -> RETURN ""; END RETURN ""; diff --git a/transpile-tests/_xfail/383_uaf_match_as_borrow_returned.cht b/transpile-tests/_xfail/383_uaf_match_as_borrow_returned.cht new file mode 100644 index 000000000..a3927e6f8 --- /dev/null +++ b/transpile-tests/_xfail/383_uaf_match_as_borrow_returned.cht @@ -0,0 +1,52 @@ +# XFAIL — known UAF that bypasses the MIR checker. +# +# Build/run: ./clear test transpile-tests/_xfail/383_uaf_match_as_borrow_returned.cht +# Expect: SEGFAULT or corrupted print output (e.g., `got [ÿÿ]`). +# +# Bug: `MATCH v AS s` binds `s` as a borrow into the variant payload owned by +# the caller of `stringOf`. Returning `s` hands a slice back to the caller of +# `getName!`. Inside `getName!`, `result`'s cleanup-defer fires AFTER `out` is +# captured but BEFORE the function actually returns; the slice in `out` then +# points into freed memory. +# +# Why the static checker misses it: +# 1. src/annotator.rb:1733-1737 marks MATCH-AS bindings :borrowed in the +# OwnershipGraph but does NOT set SymbolEntry#non_escaping=true. +# 2. src/annotator.rb:2242 gates the existing "return-of-borrow" rejection +# on `@with_block_depth > 0`, so the check only fires inside WITH blocks. +# +# Fix sketch: +# - Set non_escaping=true on MATCH-AS bindings when the payload is non-Copy +# (String/slice/@indirect/container). +# - Lift the WITH-only gate so the existing GetField/GetIndex chain check +# fires unconditionally. The check at lines 2244-2249 already handles +# `RETURN s`, `RETURN s.field`, `RETURN s[i]` — exactly the shapes we +# need to reject. +# +# Once both fixes land, this file should compile-error with something like +# RETURN_OF_BORROWED_PAYLOAD instead of segfaulting at runtime. + +UNION Value { + Number: Float64, + Symbol: String +} + +FN stringOf(v: Value) RETURNS String -> + PARTIAL MATCH v START + Value.Symbol AS s -> RETURN s;, + DEFAULT -> RETURN ""; + END + RETURN ""; +END + +FN getName!() RETURNS !String -> + result: Value = Value{ Symbol: COPY "Ok" }; + out = stringOf(result); + RETURN out; +END + +FN main() RETURNS Void -> + s = getName!(); + print("got [" + s + "]"); + ASSERT s == "Ok", "borrowed string returned across cleanup boundary"; +END From e112cd498886d9baf0fc52e75afe5260be8b5023 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sun, 10 May 2026 12:29:58 +0000 Subject: [PATCH 09/21] feat(fuzz): add tools/fuzz combinatoric harness with 5 templates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Template-based program generator that drives .cht source through `./clear test` end-to-end, exercising the MIR static checker (9 invariants) and runtime leak detection (`std.testing.allocator`). Templates (90 active cells, 30 reserved :in_dev for unlanded features): - escape_via_return (18) — E2 :always_returned, :heap_ptr_return - loop_carry_collection (8) — E2 :loop_carry_string + frame-rewind - mutable_collection_param (8) — INV-CROSS-FRAME-PARAM-ALLOC - nested_loop_escape (8) — loop-local list/map escape (commit 9fa21926) - stream_into_boundary (48) — NEXT × {BG, DO, BG STREAM} × ownership/sync/move/value - lifetimed_return (6) — bg_lifetime_sources stamping → enforcement check Cells carry an `expected:` annotation (:pass / :compile_error / :in_dev) so the matrix accommodates documented capability boundaries (e.g., DO + @local) and unlanded features (LEND, @atomic BG-body capture) without losing cell count. Findings on the current tree: - 1 leak: (bg_stream, local, copy, String) — heap addresses leak after BG STREAM consumer with WHILE TRUE and outer-scope String COPY. - 2 UNEXPECTED-PASS: BG handle capturing @local can RETURN or be stored in a heap struct field. Compiler accepts; runtime crashes with SIGABRT. Real UAF surface — bg_lifetime_sources stamps but doesn't enforce these cases. - 5 MIR-FAIL: phase B outstanding work (BG-body atomic auto-load, edge case in BG+versioned+copy). docs/agents/formal-verification-testing.md inventories the 12 testing layers, what each covers, what's intentionally not covered (with reasons), and a ranked TODO of 13 combinatoric sets to add. Folds in takeaways from an older draft at ~/manual/clear/docs/agents/. Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitignore | 4 + docs/agents/formal-verification-testing.md | 357 ++++++++++++++++++ tools/fuzz/README.md | 157 ++++++++ tools/fuzz/generator.rb | 50 +++ tools/fuzz/run.rb | 156 ++++++++ tools/fuzz/templates/escape_via_return.rb | 68 ++++ tools/fuzz/templates/lifetimed_return.rb | 155 ++++++++ tools/fuzz/templates/loop_carry_collection.rb | 59 +++ .../templates/mutable_collection_param.rb | 52 +++ tools/fuzz/templates/nested_loop_escape.rb | 69 ++++ tools/fuzz/templates/stream_into_boundary.rb | 258 +++++++++++++ 11 files changed, 1385 insertions(+) create mode 100644 docs/agents/formal-verification-testing.md create mode 100644 tools/fuzz/README.md create mode 100644 tools/fuzz/generator.rb create mode 100755 tools/fuzz/run.rb create mode 100644 tools/fuzz/templates/escape_via_return.rb create mode 100644 tools/fuzz/templates/lifetimed_return.rb create mode 100644 tools/fuzz/templates/loop_carry_collection.rb create mode 100644 tools/fuzz/templates/mutable_collection_param.rb create mode 100644 tools/fuzz/templates/nested_loop_escape.rb create mode 100644 tools/fuzz/templates/stream_into_boundary.rb diff --git a/.gitignore b/.gitignore index 2e839fd7f..527cb2c83 100644 --- a/.gitignore +++ b/.gitignore @@ -125,6 +125,10 @@ transpile-tests/[0-9]* !transpile-tests/[0-9]*.cht.disabled !transpile-tests/[0-9]*/ +# Generated fuzz programs (see tools/fuzz/). Promoted regressions move into +# transpile-tests/ proper; the fuzz/ dir holds only ephemeral output. +transpile-tests/fuzz/*.cht + # Catch-all **/*.profile/ **/.zig-cache/ diff --git a/docs/agents/formal-verification-testing.md b/docs/agents/formal-verification-testing.md new file mode 100644 index 000000000..dd17bb6eb --- /dev/null +++ b/docs/agents/formal-verification-testing.md @@ -0,0 +1,357 @@ +# Formal-Verification & Testing Coverage + +What testing layers exist, what each covers, what they explicitly don't, +and what gaps still need covering. This is the source of truth for +"is this case tested?" questions and the TODO list for new tests. + +The "formal-verification" in the title is aspirational. CLEAR has no +mechanized proof checker. What it has is: + +1. A **9-invariant MIR static checker** (a quasi-formal verification of + ownership) that fires on every `./clear build`. +2. A **borrow / move / non-escaping checker** in the annotator. +3. **Sorbet** static typing on the Ruby compiler itself. +4. **Combinatoric fuzz** templates that stress cross-products at the + end-to-end layer. + +That stack is what this doc inventories. An older draft of this doc +(in a sibling repo, `~/manual/clear/docs/agents/formal-verification-testing.md`) +sketched 7 TODO areas before the fuzz harness existed. This revision +folds in those TODOs that survived, drops the ones now closed, and +renames the two combinatoric sets per the language they belong in: + +- **Access-gate combinatoric set** — escape attempts through WITH / + alias / permission boundaries. (Was "WITH-block escape matrix" / + "Escape × Permission × Alias-Kind matrix".) +- **Execution-boundary combinatoric set** — what can and can't cross + DO / BG / CONCURRENT, with and without `@parallel`. (Was + "Concurrency × Ownership matrix".) + +## Testing layers + +| # | Layer | Location | Granularity | Oracle | +|---|---|---|---|---| +| 1 | Parser unit specs | `spec/*_parser_spec.rb` (~140 files) | AST shape per source string | RSpec asserts on AST attrs | +| 2 | Annotator unit specs | `spec/*_annotator_spec.rb`, `spec/atomic_*_spec.rb`, etc. (~130 files) | symbol table / type info per source | RSpec asserts on `entry.sync`, `type_info`, etc. | +| 3 | MIR pass specs | `spec/mir_*_spec.rb`, `spec/affine_ownership_spec.rb`, etc. (~60 files) | MIR node shape, dataflow results | RSpec asserts on MIR node trees | +| 4 | MIR static checker | `src/mir/mir_checker.rb` (9 invariants) | every `./clear build` | raises on violation pre-codegen | +| 5 | Transpiler emit specs | `spec/*_emitter_spec.rb`, `spec/test_framework_spec.rb`, `spec/polymorphic_transaction_acceptance_spec.rb` | string-grep emitted Zig | RSpec `expect(zig).to include(...)` | +| 6 | transpile-tests | `transpile-tests/*.cht` (~447 files) | end-to-end per source | `zig test` + `std.testing.allocator` | +| 7 | Module integration | `transpile-tests/module-integration/` | cross-module compile + run | `zig build test` | +| 8 | FFI integration | `transpile-tests/ffi-integration/` | extern fn boundary | `zig build test` | +| 9 | Combinatoric fuzz | `tools/fuzz/` (5 templates, ~48 active cells) | end-to-end per cell | compile + run + leak | +| 10 | Concurrency stress (zig) | `zig/runtime/*_test.zig` and Loom/Hammer/VOPR per CLAUDE.md | runtime primitives | TSan, leak detector, deterministic VOPR | +| 11 | Sorbet | `# typed: true` files under `src/` | Ruby type signatures | `srb tc` | +| 12 | Benchmarks | `benchmarks/runner.rb` | wall-time, throughput | not regression-gated; reports only | + +Effective coverage flow: + + .cht source + │ + ├─ Parser ───── unit specs (1) + ├─ Annotator ── unit specs (2) + borrow/move/non-escaping checker + ├─ MIR lower ── unit specs (3) + MIR checker (4) (statically rules out 9 invariants) + ├─ Transpiler ─ string-grep specs (5) (lowering choice) + └─ Zig output ─ transpile-tests (6) + fuzz (9) (compiles + runs + no leak) + +## What's well-tested (with confidence rating) + +| Area | How tested | Confidence | +|---|---|---| +| Parser grammar | Unit specs + transpile-tests | High | +| Type inference | Unit specs | High | +| MIR leak / orphan / mismatch | MIRChecker (structural, 9 invariants) | High | +| WITH block lowering (per alias kind, in isolation) | transpile-tests + emit specs | Medium | +| Frame vs heap allocation | transpile-tests (Zig safety + leak detector) | Medium-High | +| Concurrency primitives (BG, DO, BG STREAM, CONCURRENT in isolation) | transpile-tests + zig runtime tests | Medium | +| Concurrency race-freedom (atomics) | Loom + Hammer + VOPR | Medium | +| Error propagation (TRY / CATCH / SMOOTH) | transpile-tests | Medium | +| Polymorphic-sync dispatch *path selection* | `polymorphic_transaction_acceptance_spec.rb` (string-grep emitted Zig) | Medium | +| **Loop-local collection escape** | `nested_loop_escape` fuzz template + `loop_frame_analysis_spec.rb` | High (after commit 9fa21926) | +| **Function-boundary escape** | `escape_via_return` fuzz template | Medium-High | + +### What the 9 MIR invariants prove + +If a program passes the checker, these structural properties hold: + +1. Every `AllocMark` has a matching `Cleanup`/`ErrCleanup`/`TransferMark` (no leak). +2. Every cleanup has a matching `AllocMark` (no orphan cleanup). +3. AllocMark allocator (`:heap`/`:frame`) matches cleanup allocator. +4. Heap-returning call in statement position is bound (HPT_LEAK). +5. `InlineZig` / `RawZig` calling `CheatLib.*` declares `stdlib_def`. +6. InlineZig allocator symbols match container's AllocMark. +7. Loop bodies that frame-allocate carry per-iteration `restoreLoopMark` defer. +8. Cleanup for primitives / `Id` (no heap ownership) is rejected as a compiler bug. +9. Pointer-passed mutable params cannot be frame-allocated (UAF defense, INV-CROSS-FRAME-PARAM-ALLOC). + +The older draft cited 7 invariants; #8 and #9 were added later. + +## What's NOT covered (intentionally) + +These cases are deliberately outside this stack's scope, with reasons: + +| Case | Where it lives instead | Why | +|---|---|---| +| AST node attribute correctness | RSpec unit specs | Need internal-state assertions (`with_node.lock_error_clause[:retries] == 2`); fuzz can't see internals | +| Specific dispatch path chosen | Transpiler string-grep specs | Same — fuzz only sees "compiles + runs"; can't tell `.acquire()` vs `Versioned.update` was emitted | +| Parser error message wording | RSpec unit specs | Diagnostic-text precision matters; fuzz oracle is too coarse | +| Wall-time performance | `benchmarks/runner.rb` | Not regression-gated; would be flaky in CI; uses statistical comparison | +| Race-condition correctness in runtime | Loom / VOPR / Hammer | Need TSan instrumentation + deterministic seeds; CLEAR program fuzzing won't shake them out | +| Stack-overflow detection | `clear --stack-check` (objdump) | Per-function stack tier verification; needs LLVM machine pass, separate from ownership | +| Stdlib (CheatLib) function semantics | Zig unit tests in `zig/runtime/` | Registry-driven; integration tests cover the CLEAR-side wiring; modeling Zig semantics formally is out of scope | + +If a test belongs in one of these categories, **don't move it to the +fuzz harness** — the oracle granularity is wrong. See the +"Polymorphic-sync migration analysis" section below for one example. + +## What's NOT covered, but should be (TODO) + +Ranked by bang-for-buck. Each is a candidate for a new fuzz template +unless tagged otherwise. + +### High priority + +#### 1. Access-gate combinatoric set (WITH / alias / permission escape) + +The CLAUDE.md non-escaping rule on WITH aliases is uniformly enforced +by one flag (`SymbolEntry#non_escaping`). One uniform check is good +design, but coverage is sparse — a normalization bug in any single +escape-form checker would slip through. + +**Matrix**: +- 5 alias kinds (`EXCLUSIVE`, `VIEW`, `RESTRICT`, `BORROWED`, `SNAPSHOT`) +- × 5 permission types (`@locked`, `@writeLocked`, `@shared`, `@multiowned`, `@local`) +- × 6 escape forms (`RETURN alias`, `RETURN alias.field`, BG capture, + TAKES consumption, store-into-heap-struct-field, list append) +- = **150 cells**, ~80% negative (must reject; `RETURN COPY alias` + is the legal exception). + +**Status**: not started. **Cross-references**: `docs/agents/mir-bugs.md` #3 +(WITH RESTRICT reassignment UAF); CLAUDE.md "Key rule: WITH ... AS alias +aliases are non-escaping". + +#### 2. Execution-boundary combinatoric set (DO / BG / CONCURRENT × @parallel) + +What can and can't cross an execution boundary, with and without +`@parallel`. The `@parallel` modifier enforces stricter rules than +plain BG / DO: + +- `@local` rejected (per-scheduler affinity broken by work-stealing) +- `@multiowned` rejected (non-atomic refcount unsafe across schedulers) +- `@arena` rejected (arena is per-scheduler) +- `@pinned` BG inside `@pinned` scope must itself be `@pinned` + +The error catalog is in `src/ast/diagnostic_registry.rb`. Each +diagnostic should fire on at least one combinatoric cell. + +**Matrix**: +- 4 boundary forms (`BG`, `DO`, `BG STREAM`, `CONCURRENT EACH`/`WHERE`/`SELECT`) +- × 3 modifiers (none, `@parallel`, `@pinned`) +- × 5 ownership (`@local`, `@shared`, `@multiowned`, `@arena`, `@indirect`) +- × 4 sync wrappers for `@shared` (`@locked`, `@writeLocked`, `@atomic`, + `@versioned`) +- × 5 move modes (borrow, GIVE, COPY, CLONE, LEND-when-landed) +- × 3 value types (primitive, String, list) +- = **~700-900 cells** before pruning illegal combos. Realistic + exhaustive after pruning: **~250-400 active**, ~30% negative. + +The current `stream_into_boundary` template covers a depth-1 slice +(BG STREAM × {BG, DO, BG STREAM} × @local × {borrow, copy} × {int, +string} = 12 active cells, 90 reserved as `:in_dev`). This TODO is +its expansion to the full matrix. + +**Status**: scaffolding done; `@shared` wrapping renderer + `@parallel` ++ CONCURRENT not yet emitted. **Cross-references**: TODO-7 in the older +doc (Concurrency × Ownership Interaction); CLAUDE.md "Concurrency +Model"; recent commits adding `@parallel` diagnostics. + +#### 3. P3.3 / P3.4 / P3.5 lock-safety under nesting + +The three concurrency-static checks are tested in isolation: + +- **P3.3**: hold-lock-across-yield (forbid holding a lock across `:yield`) +- **P3.4**: naked nested-WITH (forbid unranked re-acquire) +- **P3.5**: compile-time reentrant lock (forbid recursive lock acquire + on same binding without `@reentrant`) + +Their interaction under nested permission combinations +(`EXCLUSIVE @writeLocked` inside `DO {}` inside `WITH EXCLUSIVE @locked` +etc.) is not systematically covered. A nested permission chain could +satisfy each check individually but compose into a violation. + +**Matrix**: 3 checks × ~6 nesting depths × 5 permissions = ~90 cells. +**Status**: not started. **Cross-references**: TODO-2 in older doc. + +#### 4. Error-path × allocator identity (INV-9) + +Programs with `try/catch` / `OrRescue` fallbacks where the error path +returns data from a different allocator than the success path. INV-9 +in CLAUDE.md says "error paths preserve allocator identity" — one of +the 11 memory-safety invariants. Listed in `docs/agents/mir-bugs.md` #7 +as a known fragility (`@pending_or_fallback_dupe` flag). + +**Matrix**: ~80 cells (collection × 4 error-handling forms × 5 cleanup +positions). **Status**: not started. + +### Medium priority + +#### 5. Polymorphic-sync end-to-end (complementary, not replacement) + +Existing specs (`spec/sync_polymorphism_integration_spec.rb`, +`spec/polymorphic_transaction_acceptance_spec.rb`) verify *which* +dispatch was chosen via Zig string-grep. They don't verify the chosen +dispatch runs leak-free. A `polymorphic_sync_e2e` template +(~50-100 cells) would add the runtime oracle. + +**Status**: not started. See "Polymorphic-sync migration analysis" +below for why this complements rather than replaces the existing specs. + +#### 6. Cross-module ownership + +Imported `RETURNS %T` functions called from another module. Bugs +`40d97b1e` and `5ffc1ed0` were both in this area (importer-side +provenance loss). `transpile-tests/module-integration/` covers happy +paths only. + +**Status**: not started. Requires multi-file generation. Cross-references: +TODO-6 in older doc. + +#### 7. EscapeAnalysis phase-composition fuzzer + +`EscapeAnalysis` has three phases (E1: heap_return_fns, E2: +per-declaration scan, E3: call-site tagging) whose outputs feed each +other. There is no test that proves the E1 → E3 → E2 composition is +correct for all declaration kinds (local, param, field, BG capture). +A complex function body (nested BG, BG inside WITH, DO inside loop) +might leave a frame-allocated value un-upgraded → silent UAF in +emitted Zig (caught only by leak/UAF at runtime, not statically). + +The current end-to-end fuzz harness catches *outcomes* (UAF / leak) +of phase-composition bugs but doesn't directly stress the phase +boundaries. A *unit-level* property fuzzer that generates random +function bodies and asserts `EscapeAnalysis` always stamps +`storage = :heap` on every transitively-escaping declaration would +catch these earlier and pinpoint the failing phase. + +**Status**: not started. **Cross-references**: TODO-3 in older doc. +This is unit-level, so probably belongs in `spec/`, not `tools/fuzz/`. + +#### 8. Frame-arena overflow stress + +Large structs in deep loops, recursive call chains, nested `BG` +frames. Bugs `295d7b2b` and `b4b9da8a` came from this area. + +**Status**: not started. + +#### 9. MATCH TAKES variant payload + +`docs/agents/mir-bugs.md` #2: matched variant without `AS` binding +leaks the payload. Not specifically templated. + +**Matrix**: ~30 cells (union shape × variant-with-payload × MATCH form). +**Status**: not started. + +### Low priority (or in_dev / blocked) + +#### 10. LEND poisoning negative tests + +Keyword unimplemented (`TODO.md:41`). Fuzz cells already reserved as +`:in_dev` in `stream_into_boundary`. When LEND lands, flip cells from +`:in_dev` to `:compile_error` for escape-attempt patterns. + +**Status**: scaffolding ready; blocked on language feature. + +#### 11. MIRChecker invariant completeness (mechanized proof) + +The 9 invariants are enforced, but there is no written or mechanized +proof that they are *sufficient* — i.e., that a program satisfying +all 9 cannot have a UAF, double-free, or leak. The mapping from +failure mode → invariant(s)-preventing-it is implicit in CLAUDE.md. + +**Approach**: +- Short-term: write the explicit failure-mode → invariant mapping. + Makes gaps visible by inspection. +- Long-term (stretch): mechanize in Lean 4 / Coq for the subset of + MIR that handles frame/heap alloc + drop + move. MIR is small + enough that this is bounded work, not open-ended research. + +**Status**: not started. Cross-references: TODO-4 in older doc. + +#### 12. Sorbet nil-kill completion + +Large portions of `annotator.rb`, `parser.rb`, `scope.rb` have +`T.untyped` returns or missing ivar `T.let` declarations. Type errors +in those paths are invisible to Sorbet. Already in progress on a +nil-kill branch. + +**Status**: in progress (separate workstream). Cross-references: +TODO-5 in older doc. Not a fuzz concern. + +#### 13. Stack-size verification combinatorics + +The `clear --stack-check` flag verifies per-function stack stays +within tier limits via objdump. Combinatoric stress would catch +unintended frame growth. + +**Status**: not started. Probably belongs in a separate harness, not +the fuzz harness — different oracle (objdump output, not run+leak). + +## Polymorphic-sync migration analysis + +Asked-and-answered: should the ~2,250 lines of polymorphic-sync specs +move to `tools/fuzz/`? + +**No.** They assert at three layers the fuzz harness cannot see: + +| Layer | Example | Fuzz can replace? | +|---|---|---| +| Parser AST | `ast.requires == {"c" => Set[:LOCKED]}` | No | +| Annotator state | `with_node.lock_error_clause[:retries] == 2` | No | +| Lowering choice | `zig.include?(".acquire()") && !include?("Versioned.update")` | **No — critical.** Both dispatch paths might run correctly under the wrong selection and leak no memory; the unit spec is the only place this is caught. | + +Churn signal: 6 commits in 6 months across 11 files. Not a maintenance +burden. No complexity-reduction win available. + +**Complementary move** (not migration): adding `polymorphic_sync_e2e` +(TODO #5 above) catches a different class of bug — runtime-contract +violations under specific sync wrappers — that the unit specs miss +because they stop at Zig string-grep. + +## Workflow when adding a new fuzz template + +1. Identify the invariant or escape case to stress (cite the source — + a recent commit, a `mir-bugs.md` entry, a CLAUDE.md invariant, or + a TODO above). +2. Drop a file under `tools/fuzz/templates/`. Auto-loaded. +3. Declare the parameter cells (the matrix). Cells get an `expected:` + annotation: `:pass` (default), `:compile_error`, or `:in_dev` + (reserved, not run — for unlanded features like LEND or + capability-wrapping renderer). +4. Smoke: `ruby tools/fuzz/run.rb --templates --count 5 --seed 1`. +5. Validate it would have caught a real bug: revert the relevant fix + in the working tree, re-run, expect failures matching the bug + shape, then `git checkout` to restore. (This was done for commit + `9fa21926` and the `nested_loop_escape` template.) +6. If a real bug surfaces, the failing `.cht` is reproducible by + filename. Move it to `transpile-tests/` with a descriptive name — + it becomes a permanent regression test — and fix the bug. + +See `tools/fuzz/README.md` for the harness mechanics. + +## TODO summary + +| # | Gap | Risk | Effort | Priority | +|---|---|---|---|---| +| 1 | **Access-gate combinatoric set** (WITH × alias × permission × escape) | Medium | Medium | High | +| 2 | **Execution-boundary combinatoric set** (DO/BG/CONCURRENT × @parallel × ownership × move) | High | Medium | High | +| 3 | P3.3/P3.4/P3.5 lock-safety nesting | Low | Low | High | +| 4 | Error-path × allocator identity (INV-9) | Medium | Medium | High | +| 5 | Polymorphic-sync e2e (complementary) | Medium | Low | Medium | +| 6 | Cross-module ownership | Medium | Low | Medium | +| 7 | EscapeAnalysis phase-composition (unit-level fuzzer) | High | High | Medium | +| 8 | Frame-arena overflow stress | Medium | Low | Medium | +| 9 | MATCH TAKES variant payload | Low | Low | Medium | +| 10 | LEND poisoning negative tests | Low | Medium | Blocked on feature | +| 11 | MIRChecker invariant completeness (proof / Lean) | Medium | High | Low | +| 12 | Sorbet nil-kill | Medium | Medium | In progress | +| 13 | Stack-size combinatorics | Low | Medium | Low | diff --git a/tools/fuzz/README.md b/tools/fuzz/README.md new file mode 100644 index 000000000..04abc65b9 --- /dev/null +++ b/tools/fuzz/README.md @@ -0,0 +1,157 @@ +# tools/fuzz — Combinatorial Fuzz Harness + +Template-based program generator that stresses MIR ownership invariants and +escape-analysis cross-products. Runs `.cht` programs through the existing +`./clear test` pipeline, which catches MIR violations (statically) and leaks / +UAF / double-free (at runtime via `std.testing.allocator`). + +## Usage + + # Generate + run the full matrix (every parameter combination) + ruby tools/fuzz/run.rb --matrix + + # Sample N tuples from the matrix with a fixed seed + ruby tools/fuzz/run.rb --count 50 --seed 42 + + # Generate only — useful for inspecting outputs before running them + ruby tools/fuzz/run.rb --matrix --generate-only + + # Custom output dir + clean previous run + ruby tools/fuzz/run.rb --matrix --out /tmp/fuzz --clean + +Exit code is 0 only if every program parses, type-checks, transpiles, runs, +and reports zero leaks. + +## Layout + + tools/fuzz/ + run.rb # driver + generator.rb # template registry + tuple iteration + templates/*.rb # one file per template + transpile-tests/fuzz/ + fuzz__.cht # generated programs (gitignored) + +Each template registers itself with `FuzzGenerator.register(name, cells:) { |params| ... }`, +declaring its parameter cells (the matrix it owns) and a renderer that turns a +cell into a complete .cht source string with embedded `ASSERT` oracles. + +## Current templates + +| Template | Active cells | Stresses | +|-----------------------------|--------------|----------| +| `escape_via_return` | 18 | E2 :always_returned, :heap_ptr_return | +| `loop_carry_collection` | 8 | E2 :loop_carry_string + frame-rewind invariant | +| `mutable_collection_param` | 8 | E2 :mutable_list_param_escape, INV-CROSS-FRAME-PARAM-ALLOC | +| `nested_loop_escape` | 8 | Loop-local list/map escape -> outer container (commit 9fa21926) | +| `stream_into_boundary` | 48 (+18 in_dev) | NEXT value passed across BG / DO / BG STREAM boundary, all sync wrappers | +| `lifetimed_return` | 6 (+12 in_dev) | BG handle escape rejection — exercises bg_lifetime_sources stamping | + +### `stream_into_boundary` matrix + +Combinatoric set for "STREAM nexts passed in DO / BG / BG STREAM blocks". +Per-cell parameters: + +- `consumer` ∈ {bg, do, bg_stream} +- `ownership` ∈ {local, shared} (per spec — @multiowned/@indirect cannot cross) +- `sync` ∈ {none, locked, write_locked, atomic, versioned} +- `move` ∈ {borrow, copy, give, clone, lend} (CLONE only for @shared/@split) +- `value` ∈ {int, string, struct} (struct used for non-atomic @shared cells) + +**Phase A** (12 active): `@local` × {borrow, copy} × {int, string} × 3 consumers. DO+@local +cells expected `:compile_error` (DO branches don't capture outer @local locals). + +**Phase B** (36 active, was 90 :in_dev): `@shared` with each of 4 sync wrappers × +{borrow, copy, clone} × 3 consumers. Per-sync value: `@atomic` uses Int64 (bare +Atomic, no Arc); `@locked` / `@writeLocked` / `@versioned` use a Counter struct. +Access dispatch: WITH EXCLUSIVE for locked/writeLocked, WITH SNAPSHOT for versioned, +direct read for atomic primitives. + +Findings encoded as `:compile_error` (matrix runs cleanly today): +- DO + @shared (any move): DO branches don't capture outer @shared bindings. +- CLONE + (atomic | locked | writeLocked | versioned): "CLONE only supported on + @split streams, @shared promises, owned shared handles". Bare Atomic primitives + and sync-wrapped structs aren't recognized as Arc'd by CLONE. + +Outstanding `:pass` failures (real findings the matrix surfaces): +- (BG | BG STREAM) + atomic + (borrow | copy) + Int64: BG body capture yields + `*AtomicInt(i64)` instead of auto-loading. Workaround in test corpus (test 339) + is to call a helper fn with `REQUIRES c: ATOMIC` rather than read directly. +- (BG, versioned, copy, struct): single edge case currently MIR-fails. + +**Phase C** (18 :in_dev): LEND keyword (TODO.md:41 — not yet parsed). The +LEND escape-poisoning rules will become negative-test cells (`expected: +:compile_error`) when LEND lands. + +### `lifetimed_return` matrix + +Verifies `bg_lifetime_sources` stamping (`src/annotator.rb:6449`) translates +into ENFORCEMENT — i.e., a BG handle that captures a lifetime-bound source +(@local / @shared:atomic-primitive / @multiowned / @locked) is rejected on +escape attempts. + +Cell parameters: `consumer` ∈ {bg, bg_stream} × `ownership` ∈ {local, +atomic_int, locked} × `escape` ∈ {await_in_scope, return_handle, +store_in_field}. + +Currently active: `:local` only (6 cells). The two negative cells for +`(BG, @local, return_handle)` and `(BG, @local, store_in_field)` are +`UNEXPECTED-PASS` against the current tree — the compiler accepts them and +runtime crashes with SIGABRT. **This is a real UAF surface that the +matrix immediately surfaced.** `bg_stream` correctly rejects both patterns. + +In_dev (12): `:atomic_int` and `:locked` baselines fail to compile because +BG capture of `@shared:atomic` / `@locked` doesn't auto-unwrap inside the +BG body — same root cause as the `stream_into_boundary` outstanding +failures. Flip these to `:pass` once the unwrap path lands. + +### Cell expectations + +Each cell carries an `expected:` annotation: + +- `:pass` (default) — must compile, run, and not leak. +- `:compile_error` — must fail compilation (CLEAR-level or Zig-level codegen). Used for + documented capability boundaries (e.g., `(DO + @local)` — DO branches lower to inner + Zig fns that don't close over enclosing locals; DO is meant for @shared state). +- `:in_dev` — emitted as a comment, NOT run. Reserves matrix space for unlanded features + (LEND, the @shared sync phases). The matrix count stays stable as features land — flip + the cell expectation, no schema churn. + +The runner reports `UNEXPECTED-PASS` when a `:compile_error` cell compiles successfully — +that's the signal a feature has landed and the cell should be flipped to `:pass`. + +Adding a new template = drop a new file under `templates/`. The generator +auto-loads everything in that directory at startup. + +## When a fuzz run finds a bug + +1. The failing `.cht` is named by content hash, so it's reproducible across + runs given the same seed and template. +2. Move the failing file from `transpile-tests/fuzz/` into `transpile-tests/` + with a descriptive name (e.g. `382_loop_local_map_escape.cht`). It becomes + a permanent regression test caught by `./clear test transpile-tests/`. +3. Fix the underlying bug in `src/`. +4. Re-run the fuzz suite. The newly added permanent test runs as part of the + main suite; the fuzz suite confirms no other shapes regressed. + +## Verification + +The system was validated by reverting commit `9fa21926` (loop frame promotion +fix for lists/maps/arrays) in the working tree: `nested_loop_escape` immediately +reported 8/8 failures with `[FRAME_NO_REWIND]` MIR errors. With the fix in +place, the full matrix passes 36/36. + +## Design notes + +- **Template-based, not grammar-based.** Random AST generation produces 90% + trivial syntax that doesn't reach MIR. Templates target the bug shapes that + actually slip through hand-written tests. +- **Per-file `clear test` invocation.** Each generated `.cht` is run through + `./clear test ` which uses `gen.rb --single`. Slower than batching but + trivial to integrate; switch to a bundled runner if matrix size grows past + ~200 programs. +- **Static + dynamic oracles.** The MIR checker (9 invariants) catches most + bugs before codegen; `std.testing.allocator` catches anything that survives + to runtime as a leak. +- **No formal verification.** The MIR checker already encodes a quasi-formal + proof of 9 invariants. Going further would cost months for marginal gain + over randomized stress testing of those same invariants. diff --git a/tools/fuzz/generator.rb b/tools/fuzz/generator.rb new file mode 100644 index 000000000..8e60e6455 --- /dev/null +++ b/tools/fuzz/generator.rb @@ -0,0 +1,50 @@ +# Combinatorial fuzz-program generator for the CLEAR transpiler. +# +# Templates live under tools/fuzz/templates/. Each template registers itself with +# FuzzGenerator.register, declaring its parameter cells (the matrix it owns) and +# a renderer that turns a cell into a complete .cht source string. + +class FuzzGenerator + TEMPLATES = {} + + Template = Struct.new(:name, :cells, :renderer, keyword_init: true) + + def self.register(name, cells:, &renderer) + raise "duplicate template #{name}" if TEMPLATES.key?(name) + TEMPLATES[name] = Template.new(name: name, cells: cells, renderer: renderer) + end + + def initialize(seed:) + @rng = Random.new(seed) + load_templates! + end + + def load_templates! + Dir[File.expand_path('templates/*.rb', __dir__)].sort.each { |f| require f } + raise "no templates loaded" if TEMPLATES.empty? + end + + def full_matrix + TEMPLATES.flat_map do |name, t| + t.cells.map { |params| { template: name, params: params } } + end + end + + def sample(n) + matrix = full_matrix + return matrix if n >= matrix.size + matrix.sample(n, random: @rng) + end + + # Returns a hash: { source: , expected: :pass | :compile_error | :in_dev }. + # :in_dev cells are skipped by the runner — they reserve space in the matrix + # for features not yet landed (e.g., LEND) so the matrix counts stay stable. + def emit(tuple) + t = TEMPLATES.fetch(tuple[:template]) + cell = tuple[:params].dup + expected = cell.delete(:expected) || :pass + src = t.renderer.call(cell) + header = "# AUTOGENERATED by tools/fuzz — template=#{tuple[:template]} expected=#{expected} params=#{cell.inspect}\n" + { source: header + src, expected: expected } + end +end diff --git a/tools/fuzz/run.rb b/tools/fuzz/run.rb new file mode 100755 index 000000000..2dd0bf288 --- /dev/null +++ b/tools/fuzz/run.rb @@ -0,0 +1,156 @@ +#!/usr/bin/env ruby +# Driver for the combinatorial fuzz harness. +# +# Modes: +# ruby tools/fuzz/run.rb --count 20 --seed 42 # sample, per-file run (default) +# ruby tools/fuzz/run.rb --matrix # full matrix +# ruby tools/fuzz/run.rb --generate-only # generate only, don't run +# ruby tools/fuzz/run.rb --templates t1,t2 # restrict to named templates +# +# Cells may be marked :in_dev to reserve matrix space for unlanded features +# (e.g., LEND). They are emitted as comments and not run. + +require 'optparse' +require 'fileutils' +require 'digest' +require_relative 'generator' + +LITEDB_ROOT = File.expand_path('../..', __dir__) + +opts = { + count: 20, + seed: nil, + out: File.expand_path('../../transpile-tests/fuzz', __dir__), + mode: :sample, # :sample | :matrix + generate_only: false, + clean: false, + templates: nil, +} + +OptionParser.new do |o| + o.banner = "Usage: ruby tools/fuzz/run.rb [options]" + o.on('--count N', Integer) { |v| opts[:count] = v } + o.on('--seed S', Integer) { |v| opts[:seed] = v } + o.on('--out DIR') { |v| opts[:out] = File.expand_path(v) } + o.on('--matrix') { opts[:mode] = :matrix } + o.on('--generate-only') { opts[:generate_only] = true } + o.on('--clean') { opts[:clean] = true } + o.on('--templates LIST') { |v| opts[:templates] = v.split(',').map(&:to_sym) } + o.on('-h', '--help') { puts o; exit 0 } +end.parse! + +opts[:seed] ||= Random.new_seed + +if opts[:clean] && Dir.exist?(opts[:out]) + Dir.glob(File.join(opts[:out], 'fuzz_*.cht')).each { |f| File.delete(f) } +end + +FileUtils.mkdir_p(opts[:out]) + +gen = FuzzGenerator.new(seed: opts[:seed]) +tuples = opts[:mode] == :matrix ? gen.full_matrix : gen.sample(opts[:count]) +if opts[:templates] + tuples = tuples.select { |t| opts[:templates].include?(t[:template]) } +end + +emitted = [] # array of { path:, expected: } +in_dev_count = 0 +tuples.each do |tuple| + result = gen.emit(tuple) + if result[:expected] == :in_dev + in_dev_count += 1 + next + end + hash = Digest::SHA1.hexdigest(result[:source])[0, 10] + name = "fuzz_#{tuple[:template]}_#{hash}.cht" + path = File.join(opts[:out], name) + File.write(path, result[:source]) + emitted << { path: path, expected: result[:expected] } +end + +puts "[fuzz] emitted #{emitted.size} programs to #{opts[:out]} (seed=#{opts[:seed]}, mode=#{opts[:mode]}, in_dev=#{in_dev_count})" + +if opts[:generate_only] + exit 0 +end + +# ── Runner ────────────────────────────────────────────────────────────── +# Bundled runner: build one zig/all-fuzz.zig, run zig test once. Mirrors +# transpile-tests/gen.rb's bulk pattern. Per-file mode invokes ./clear test +# per program — slower but isolates failures cleanly. + +def per_file_run(emitted) + clear = File.expand_path('../../clear', __dir__) + pass, fails, leaks, mir_errors, unexpected_pass = [], [], [], [], [] + + emitted.each_with_index do |entry, i| + path, expected = entry[:path], entry[:expected] + short = File.basename(path) + print "[#{i + 1}/#{emitted.size}] #{short} (#{expected})... " + out = `#{clear} test #{path} 2>&1` + status = $?.exitstatus + + compile_error = out.include?('MIR ownership verification failed') || + out.include?('[Compiler Error]') || + out.include?('Transpilation failed') || + out =~ /\.zig:\d+:\d+: error:/ + # Both flavors: directory mode reports "MEMORY LEAKS: N"; single-file mode + # reports per-address "[DebugAllocator] (err): ... leaked" then a summary + # "N tests leaked memory". + leak = out =~ /MEMORY LEAKS:\s*[1-9]/ || + out.include?('[DebugAllocator] (err)') || + out =~ /\d+ tests leaked memory/ + runtime_fail = (status != 0 && !compile_error) + + case expected + when :pass + if compile_error + puts "MIR-FAIL" + mir_errors << [path, out] + elsif leak + puts "LEAK" + leaks << [path, out] + elsif runtime_fail + puts "FAIL (exit #{status})" + fails << [path, out] + else + puts "ok" + pass << path + end + when :compile_error + if compile_error + puts "ok (rejected)" + pass << path + else + puts "UNEXPECTED-PASS" + unexpected_pass << [path, out] + end + end + end + + [pass, fails, leaks, mir_errors, unexpected_pass] +end + +pass, fails, leaks, mir_errors, unexpected_pass = per_file_run(emitted) + +puts +puts "=" * 60 +puts "Summary: #{emitted.size} run, #{pass.size} ok, #{fails.size} fail, #{leaks.size} leak, #{mir_errors.size} mir-error, #{unexpected_pass.size} unexpected-pass" +puts "=" * 60 + +[ + ["FAILURES", fails], + ["LEAKS", leaks], + ["MIR ERRORS", mir_errors], + ["UNEXPECTED PASS", unexpected_pass], +].each do |label, list| + next if list.empty? + puts + puts "#{label}:" + list.each do |path, out| + puts " - #{path}" + out.each_line.first(10).each { |l| puts " #{l}" } + end +end + +exit (fails.empty? && leaks.empty? && mir_errors.empty? && unexpected_pass.empty?) ? 0 : 1 diff --git a/tools/fuzz/templates/escape_via_return.rb b/tools/fuzz/templates/escape_via_return.rb new file mode 100644 index 000000000..b09a7965e --- /dev/null +++ b/tools/fuzz/templates/escape_via_return.rb @@ -0,0 +1,68 @@ +# Template: collection escapes via RETURN. +# Stresses E2 :always_returned + :heap_ptr_return. +# +# Pattern: build a collection inside a function, return it. The fix path is +# `promoteList`/heap-promotion at the boundary; if the compiler skips it, the +# caller holds a dangling pointer to a frame buffer. + +ESCAPE_VIA_RETURN_CELLS = [] + +[:int, :string].each do |elem| + [:none, :loop, :early_if].each do |body| + [3, 7].each do |size| + ESCAPE_VIA_RETURN_CELLS << { elem: elem, body: body, size: size } + end + end +end + +FuzzGenerator.register(:escape_via_return, cells: ESCAPE_VIA_RETURN_CELLS) do |p| + zig_type = (p[:elem] == :int) ? "Int64" : "String" + type_decl = "#{zig_type}[]@list" + + values = (1..p[:size]).map do |i| + p[:elem] == :int ? "#{i}_i64" : %("v#{i}") + end + + body = case p[:body] + when :none + values.map { |v| " lst.append(#{v});" }.join("\n") + when :loop + if p[:elem] == :int + " FOR i IN (1_i64 ..= #{p[:size]}_i64) DO\n lst.append(i);\n END" + else + values.map { |v| " lst.append(#{v});" }.join("\n") + end + when :early_if + half = (p[:size] / 2).clamp(1, p[:size]) + front = values.first(half).map { |v| " lst.append(#{v});" }.join("\n") + rest = values.drop(half).map { |v| " lst.append(#{v});" }.join("\n") + "#{front}\n IF #{half}_i64 < 0_i64 THEN\n RETURN lst;\n END\n#{rest}" + end + + expected_len = if p[:body] == :loop && p[:elem] == :int + p[:size] + else + p[:size] + end + + first_check = if p[:elem] == :int + "ASSERT result[0] == 1_i64, \"first element\";" + else + 'ASSERT eql?(result[0], "v1"), "first element";' + end + + <<~CHT + FN make() RETURNS !#{type_decl} -> + MUTABLE lst: #{type_decl} = []; + #{body} + RETURN lst; + END + + FN main() RETURNS Void -> + result = make(); + ASSERT length(result) == #{expected_len}_i64, "returned list length"; + #{first_check} + RETURN; + END + CHT +end diff --git a/tools/fuzz/templates/lifetimed_return.rb b/tools/fuzz/templates/lifetimed_return.rb new file mode 100644 index 000000000..3c2d86a6f --- /dev/null +++ b/tools/fuzz/templates/lifetimed_return.rb @@ -0,0 +1,155 @@ +# Template: BG / BG STREAM handle that captures a lifetime-bound source +# must be rejected on escape. +# +# Source mechanism: src/annotator.rb:6449 `bg_lifetime_sources` stamps a +# lifetime on the BG handle's symbol when it captures a binding whose +# sync ∈ {:atomic, :locked, :write_locked, :local} or storage == :multiowned. +# Plain @shared (Arc, no sync) is NOT lifetime-bound — refcount handles it. +# +# This matrix verifies STAMPING translates into ENFORCEMENT. A by-hand +# probe (RETURN BG{capture}, with `c @local`) compiled cleanly and then +# crashed at runtime with SIGABRT — so the gap is real. +# +# Cell shape: +# { consumer:, ownership:, escape:, expected: } +# +# - consumer ∈ {:bg, :bg_stream} +# - ownership ∈ {:local, :atomic_int, :locked} — three lifetime-bound shapes +# covering: raw *T (@local), bare Atomic primitive (@shared:atomic Int64), +# Arc(Locked(struct)) (@locked struct). +# - escape ∈ {:await_in_scope, :return_handle, :store_in_field} +# - :await_in_scope is the positive baseline (canonical safe pattern) +# - :return_handle is the negative case ⇒ expected :compile_error +# - :store_in_field is the negative case (heap-struct field stores +# a captured handle) ⇒ expected :compile_error + +LIFETIMED_RETURN_CELLS = [] + +# v1 active: :local ownership only — exercises the @local lifetime-stamp +# path in src/annotator.rb:6457 and immediately surfaced two real bugs +# (return_handle and store_in_field both UNEXPECTED-PASS at compile, +# crash with SIGABRT at runtime). +# +# v1 in_dev: :atomic_int and :locked. The await_in_scope baseline for +# both currently fails compilation because BG capture of @shared:atomic +# / @locked does not auto-unwrap inside the BG body (you get a *AtomicInt +# / Arc(Locked) pointer where an Int64 is expected). That's a separate +# bug class from lifetime enforcement; folding it in here would conflate +# findings. Flip these to :pass once the BG-body unwrap path lands. +[:bg, :bg_stream].each do |consumer| + [:local, :atomic_int, :locked].each do |ownership| + [:await_in_scope, :return_handle, :store_in_field].each do |escape| + cell = { consumer: consumer, ownership: ownership, escape: escape } + cell[:expected] = (escape == :await_in_scope) ? :pass : :compile_error + cell[:expected] = :in_dev if ownership != :local + LIFETIMED_RETURN_CELLS << cell + end + end +end + +# ── helpers ─────────────────────────────────────────────────────────── + +# Declares the captured value with the given ownership inside a function +# body. Returns: [decl_lines, value_expr_inside_bg_body]. +# +# - :local → `MUTABLE c = Counter{} @local;` ; capture reads `c.value` +# - :atomic_int → `MUTABLE c: Int64 = 0_i64 @shared:atomic;` ; capture reads `c` +# - :locked → `c = Counter{ value: 0 } @locked;` ; capture reads +# via `WITH EXCLUSIVE c AS x { x.value }` +def lifetime_value_setup(ownership) + case ownership + when :local + decl = " MUTABLE c = Counter{ value: 0_i64 } @local;" + use = "c.value" + when :atomic_int + decl = " MUTABLE c: Int64 = 0_i64 @shared:atomic;" + use = "c" + when :locked + decl = " c = Counter{ value: 0_i64 } @locked;" + use = "WITH EXCLUSIVE c AS x { x.value }" + end + [decl, use] +end + +FuzzGenerator.register(:lifetimed_return, cells: LIFETIMED_RETURN_CELLS) do |p| + decl, use = lifetime_value_setup(p[:ownership]) + + # Yield-shape inside the consumer body — :bg returns the value once; + # :bg_stream YIELDs the value forever and main reads two iterations. + bg_body = case p[:consumer] + when :bg then "#{use}; " + when :bg_stream then "WHILE TRUE DO YIELD #{use}; END" + end + + bg_decl_type = case p[:consumer] + when :bg then "~Int64" + when :bg_stream then "~Int64[INF]" + end + + bg_lit = "BG#{p[:consumer] == :bg_stream ? ' STREAM' : ''} { #{bg_body} }" + + case p[:escape] + when :await_in_scope + # Positive baseline — declare, capture, await IN SAME SCOPE. Should pass. + consume_block = case p[:consumer] + when :bg + <<~CHT.chomp + bg: #{bg_decl_type} = #{bg_lit}; + r: Int64 = NEXT bg; + ASSERT r == 0_i64, "await produced a value"; + CHT + when :bg_stream + <<~CHT.chomp + bg: #{bg_decl_type} = #{bg_lit}; + a: Int64 = NEXT bg; + b: Int64 = NEXT bg; + ASSERT a == 0_i64, "stream first"; + ASSERT b == 0_i64, "stream second"; + CHT + end + + <<~CHT + STRUCT Counter { value: Int64 } + + FN main() RETURNS Void -> + #{decl} + #{consume_block} + RETURN; + END + CHT + + when :return_handle + # Negative case — helper declares the source, returns a BG that captures + # it. Source dies before BG is awaited. Today's compiler accepts; runtime + # crashes (SIGABRT in scheduler). Should be rejected at compile. + <<~CHT + STRUCT Counter { value: Int64 } + + FN spawn() RETURNS #{bg_decl_type} -> + #{decl} + RETURN #{bg_lit}; + END + + FN main() RETURNS Void -> + bg = spawn(); + #{p[:consumer] == :bg ? 'r: Int64 = NEXT bg;' : 'a: Int64 = NEXT bg;'} + RETURN; + END + CHT + + when :store_in_field + # Negative case — heap-allocated holder stores a BG handle that + # captures the source. Holder outlives source ⇒ UAF on later NEXT. + <<~CHT + STRUCT Counter { value: Int64 } + STRUCT Holder { bg: #{bg_decl_type} } + + FN main() RETURNS Void -> + #{decl} + MUTABLE h: Holder = Holder{ bg: #{bg_lit} }; + #{p[:consumer] == :bg ? 'r: Int64 = NEXT h.bg;' : 'a: Int64 = NEXT h.bg;'} + RETURN; + END + CHT + end +end diff --git a/tools/fuzz/templates/loop_carry_collection.rb b/tools/fuzz/templates/loop_carry_collection.rb new file mode 100644 index 000000000..c148c5e08 --- /dev/null +++ b/tools/fuzz/templates/loop_carry_collection.rb @@ -0,0 +1,59 @@ +# Template: collection built inside a loop and used after the loop. +# Stresses E2 :loop_carry_string and the loop-escape promotion path +# (recent commits 9fa21926, d80e6539, 1599bfb1). +# +# Pattern: declare a list, push to it inside FOR, then read length/contents +# after the loop. Loop-local mark/rewind must NOT free the list's backing +# buffer (it lives in the enclosing frame, not the loop's per-iter frame). + +LOOP_CARRY_CELLS = [] + +[:int, :string].each do |elem| + [1, 2].each do |depth| + [5, 12].each do |outer| + LOOP_CARRY_CELLS << { elem: elem, depth: depth, outer: outer } + end + end +end + +FuzzGenerator.register(:loop_carry_collection, cells: LOOP_CARRY_CELLS) do |p| + zig_type = (p[:elem] == :int) ? "Int64" : "String" + type_decl = "#{zig_type}[]@list" + + push_expr = (p[:elem] == :int) ? "i" : "i.toString()" + + inner = case p[:depth] + when 1 + " FOR i IN (1_i64 ..= #{p[:outer]}_i64) DO\n lst.append(#{push_expr});\n END" + when 2 + inner_count = 3 + if p[:elem] == :int + <<~BODY.chomp + FOR i IN (1_i64 ..= #{p[:outer]}_i64) DO + FOR j IN (1_i64 ..= #{inner_count}_i64) DO + lst.append(i + j); + END + END + BODY + else + <<~BODY.chomp + FOR i IN (1_i64 ..= #{p[:outer]}_i64) DO + FOR j IN (1_i64 ..= #{inner_count}_i64) DO + lst.append(j.toString()); + END + END + BODY + end + end + + expected_len = (p[:depth] == 1) ? p[:outer] : p[:outer] * 3 + + <<~CHT + FN main() RETURNS Void -> + MUTABLE lst: #{type_decl} = []; + #{inner} + ASSERT length(lst) == #{expected_len}_i64, "list length after loop"; + RETURN; + END + CHT +end diff --git a/tools/fuzz/templates/mutable_collection_param.rb b/tools/fuzz/templates/mutable_collection_param.rb new file mode 100644 index 000000000..0f0357605 --- /dev/null +++ b/tools/fuzz/templates/mutable_collection_param.rb @@ -0,0 +1,52 @@ +# Template: MUTABLE collection passed as a parameter and mutated by the callee. +# Stresses E2 :mutable_list_param_escape and INV-CROSS-FRAME-PARAM-ALLOC. +# +# Pattern: a function takes MUTABLE xs: T[]@list, appends to it, returns. The +# caller's list crosses a frame boundary as a pointer; if the caller frame- +# allocated it, the buffer relocates on grow and the callee sees stale state +# (or the post-call read sees a freed buffer). + +MUTABLE_PARAM_CELLS = [] + +[:int, :string].each do |elem| + [:none, :outer_loop].each do |context| + [1, 4].each do |calls| + MUTABLE_PARAM_CELLS << { elem: elem, context: context, calls: calls } + end + end +end + +FuzzGenerator.register(:mutable_collection_param, cells: MUTABLE_PARAM_CELLS) do |p| + zig_type = (p[:elem] == :int) ? "Int64" : "String" + type_decl = "#{zig_type}[]@list" + + push_value = (p[:elem] == :int) ? "99_i64" : '"hello"' + + # `xs.append` is fallible (OOM) so callee must declare !Void. + callee = <<~CHT.chomp + FN add!(MUTABLE xs: #{type_decl}) RETURNS !Void -> + xs.append(#{push_value}); + RETURN; + END + CHT + + call_block = case p[:context] + when :none + (1..p[:calls]).map { " add!(lst);" }.join("\n") + when :outer_loop + " FOR i IN (1_i64 ..= #{p[:calls]}_i64) DO\n add!(lst);\n END" + end + + expected_len = p[:calls] + + <<~CHT + #{callee} + + FN main() RETURNS Void -> + MUTABLE lst: #{type_decl} = []; + #{call_block} + ASSERT length(lst) == #{expected_len}_i64, "list length after mutating calls"; + RETURN; + END + CHT +end diff --git a/tools/fuzz/templates/nested_loop_escape.rb b/tools/fuzz/templates/nested_loop_escape.rb new file mode 100644 index 000000000..d8bf5f225 --- /dev/null +++ b/tools/fuzz/templates/nested_loop_escape.rb @@ -0,0 +1,69 @@ +# Template: a loop-LOCAL collection escapes into an outer collection. +# Stresses the loop-frame promotion path (commit 9fa21926: "cover escaping +# frame collections in loops"). Pre-fix, only loop-local Strings were +# promoted to heap on escape; lists/maps/arrays leaked or UAF'd. +# +# Pattern: +# MUTABLE outer: Int64[][]@list = []; +# FOR ... DO +# MUTABLE inner: Int64[]@list = []; # loop-local, frame +# inner.append(...); +# outer.append(inner); # escape -> must heap-promote +# END + +NESTED_LOOP_ESCAPE_CELLS = [] + +[:list, :array].each do |inner_kind| + [:while, :for].each do |loop_kind| + [1, 3].each do |outer_iters| + NESTED_LOOP_ESCAPE_CELLS << { inner_kind: inner_kind, loop_kind: loop_kind, iters: outer_iters } + end + end +end + +FuzzGenerator.register(:nested_loop_escape, cells: NESTED_LOOP_ESCAPE_CELLS) do |p| + outer_decl = "MUTABLE outer: Int64[][]@list = [];" + + inner_block = case p[:inner_kind] + when :list + <<~BODY.chomp + MUTABLE inner: Int64[]@list = []; + inner.append(i); + inner.append(i + 1_i64); + outer.append(inner); + BODY + when :array + <<~BODY.chomp + inner: Int64[] = [i, i + 1_i64]; + outer.append(inner); + BODY + end + + loop_block = case p[:loop_kind] + when :while + <<~LOOP.chomp + MUTABLE i: Int64 = 0_i64; + WHILE i < #{p[:iters]}_i64 DO + #{inner_block} + i = i + 1_i64; + END + LOOP + when :for + <<~LOOP.chomp + FOR i IN (0_i64 ..< #{p[:iters]}_i64) DO + #{inner_block} + END + LOOP + end + + <<~CHT + FN main() RETURNS Void -> + #{outer_decl} + #{loop_block} + ASSERT length(outer) == #{p[:iters]}_i64, "outer list length"; + ASSERT length(outer[0_i64]) == 2_i64, "first inner length"; + ASSERT outer[0_i64][0_i64] == 0_i64, "first inner first element"; + RETURN; + END + CHT +end diff --git a/tools/fuzz/templates/stream_into_boundary.rb b/tools/fuzz/templates/stream_into_boundary.rb new file mode 100644 index 000000000..159d6af2f --- /dev/null +++ b/tools/fuzz/templates/stream_into_boundary.rb @@ -0,0 +1,258 @@ +# Template: STREAM next passed across an execution boundary. +# Stresses the depth-1 nesting: +# +# src: ~T[INF] = BG STREAM { ... YIELD ... } +# val = NEXT src (optionally wrapped with @shared:sync) +# { ... uses val with ... } +# +# Where consumer ∈ {BG, DO, BG STREAM}, ownership ∈ {@local, @shared+sync}, +# move ∈ {borrow, copy, give, clone, lend}, value ∈ {int, string, struct}. +# +# Constraints (per spec): +# - @local | @shared can cross boundaries; @multiowned | @indirect cannot. +# - CLONE requires @shared or @split. +# - LEND poisons the boundary with the borrow's lifetime — not yet +# implemented (TODO.md:41), so LEND cells are tagged :in_dev. +# - Sync wrappers @locked / @writeLocked / @atomic / @versioned apply to +# @shared values. @local has no sync. +# - @atomic uses bare Atomic on primitives (no Arc wrap, per type.rb: +# "Atomics M2.2: drop the Arc / Rc wrap for @shared:atomic"). Other +# sync wrappers wrap a struct (Counter) for non-trivial access. + +STREAM_BOUNDARY_CELLS = [] + +CONSUMERS = [:bg, :do, :bg_stream] +VALUES_PHASE_A = [:int, :string] +LOCAL_MOVES = [:borrow, :copy] +SHARED_MOVES = [:borrow, :copy, :clone] +SYNCS = [:locked, :write_locked, :atomic, :versioned] +LEND_MOVES = [:lend] + +# Phase B forces a value type per sync (atomics need a primitive; locked/ +# writeLocked/versioned wrap a struct so WITH EXCLUSIVE / WITH SNAPSHOT +# have a non-trivial field to read). +PHASE_B_VALUE_FOR_SYNC = { + atomic: :int, + locked: :struct, + write_locked: :struct, + versioned: :struct, +} + +# Phase A — @local (no sync). +# +# Findings encoded as expectations: +# - (DO + @local + :borrow + non-Copy): USE AFTER MOVE — both DO branches +# capture val and capture-of-non-Copy is a move. +# - (DO + @local + any other combo): Zig-level "val not accessible from +# inner function" — DO branches lower to inner Zig fns that don't close +# over enclosing locals. DO is meant for @shared+sync state. +COPY_VALUES = [:int] +CONSUMERS.each do |c| + VALUES_PHASE_A.each do |v| + LOCAL_MOVES.each do |m| + cell = { consumer: c, ownership: :local, sync: :none, move: m, value: v } + cell[:expected] = :compile_error if c == :do + STREAM_BOUNDARY_CELLS << cell + end + end +end + +# Phase B — @shared with each of 4 sync strategies. +# +# Findings (from running the matrix on the current tree): +# - DO + @shared currently fails like DO + @local: branches don't capture +# outer-scope @shared bindings via the implicit-borrow path. Existing +# test corpus uses DO with state declared INSIDE each branch. Marked +# :compile_error. +# - CLONE on a sync-wrapped value errors with "CLONE is only supported on +# @split streams, @shared promises, and owned shared handles, got +# 'Int64'/'Counter'". Atomic primitives are bare (no Arc), and CLONE +# on a `@locked` struct doesn't traverse through the capability to +# find the inner Arc. Marked :compile_error for now; revisit when +# CLONE learns to look through sync wrappers. +# - (non-DO) + :atomic + any move: BG body capture of `Int64 @shared:atomic` +# yields a `*AtomicInt(i64)` pointer instead of auto-loading. Test 339 +# works around this by passing the binding to a function with REQUIRES c: +# ATOMIC. Direct read inside BG body unsupported today. Left as :pass +# so the matrix continues to report MIR-FAIL until it's fixed — these +# 6 failing cells are the outstanding work. +# - (BG, :versioned, :copy, :struct) — single edge case currently fails. +# Left as :pass so it stays visible. +CONSUMERS.each do |c| + PHASE_B_VALUE_FOR_SYNC.each do |sync, value| + SHARED_MOVES.each do |m| + cell = { consumer: c, ownership: :shared, sync: sync, move: m, value: value } + cell[:expected] = :compile_error if c == :do # DO + @shared + cell[:expected] = :compile_error if m == :clone # CLONE constraint + STREAM_BOUNDARY_CELLS << cell + end + end +end + +# Phase C — LEND (in development; TODO.md:41 — keyword not yet parsed). +CONSUMERS.each do |c| + VALUES_PHASE_A.each do |v| + LEND_MOVES.each do |m| + STREAM_BOUNDARY_CELLS << { consumer: c, ownership: :local, sync: :none, move: m, value: v, expected: :in_dev } + end + end + PHASE_B_VALUE_FOR_SYNC.each do |sync, value| + LEND_MOVES.each do |m| + STREAM_BOUNDARY_CELLS << { consumer: c, ownership: :shared, sync: sync, move: m, value: value, expected: :in_dev } + end + end +end + +# ── renderers ───────────────────────────────────────────────────────── + +# The BG STREAM source always yields Int64; non-int value cells construct +# their values from the yielded Int64. Keeps the producer side uniform. + +def fuzz_value_type(v) + case v + when :int then "Int64" + when :string then "String" + when :struct then "Counter" + end +end + +# What the BG STREAM source YIELDs. For :int and :struct cells the source +# yields a primitive Int64; for :string it yields a String. The val_decl +# stage then wraps the NEXT result into the actual cell value type. +def fuzz_src_value_type(v) + v == :string ? "String" : "Int64" +end + +def fuzz_src_yield_expr(v) + v == :string ? "i.toString()" : "i" +end + +# Build the val declaration: takes the NEXT result (typed by src) and +# binds it as the cell's value with appropriate sync wrapping. +def fuzz_val_decl(p) + src_t = fuzz_src_value_type(p[:value]) + v_t = fuzz_value_type(p[:value]) + + if p[:ownership] == :local + return "raw: #{src_t} = NEXT src;\n val: #{v_t} = raw;" + end + + # @shared:sync construction + sync_word = case p[:sync] + when :locked then "locked" + when :write_locked then "writeLocked" + when :atomic then "atomic" + when :versioned then "versioned" + end + + case p[:value] + when :int # @shared:atomic Int64 — bare Atomic, no struct + "raw: Int64 = NEXT src;\n MUTABLE val: Int64 = raw @shared:#{sync_word};" + when :struct # Counter wrapper — locked / writeLocked / versioned + "raw: Int64 = NEXT src;\n val = Counter{ value: raw } @#{sync_word};" + end +end + +# Build the read of a captured `var` into an Int64 result. Returns a pair +# [setup_stmts, terminal_expr] — caller assembles them with the move-mode +# binding line. WITH is a statement form (tests 293, 278), so struct-sync +# cells need a local binding the WITH writes into, then the BG body's +# terminal is that local. +def fuzz_read_int_fragment(p, var) + case p[:value] + when :int then ["", var] + when :string then ["", "#{var}.length()"] + when :struct + op = case p[:sync] + when :locked, :write_locked then "EXCLUSIVE" + when :versioned then "SNAPSHOT" + end + setup = "MUTABLE r: Int64 = 0_i64; WITH #{op} #{var} AS x { r = x.value; }" + [setup, "r"] + end +end + +FuzzGenerator.register(:stream_into_boundary, cells: STREAM_BOUNDARY_CELLS) do |p| + src_t = fuzz_src_value_type(p[:value]) + src_yield = fuzz_src_yield_expr(p[:value]) + + # ── outer infinite BG STREAM source ──────────────────────────────── + src_decl = <<~CHT.chomp + src: ~#{src_t}[INF] = BG STREAM { + MUTABLE i: Int64 = 1_i64; + WHILE TRUE DO + YIELD #{src_yield}; + i = i + 1_i64; + END + }; + CHT + + val_decl = fuzz_val_decl(p) + + # Build a single consumer-branch fragment given a binding name `bv` and a + # terminal verb (empty for BG/DO branches; "YIELD " for BG STREAM body). + # Composes optional move-mode binding + struct-WITH setup + terminal. + build_branch = ->(bv, terminal) do + move_setup, source_var = + case p[:move] + when :borrow then ["", "val"] + when :copy then ["#{bv} = COPY val;", bv] + when :clone then ["#{bv} = CLONE val;", bv] + when :give then ["", "GIVE val"] + when :lend then ["", "LEND val"] + end + + read_setup, read_expr = fuzz_read_int_fragment(p, source_var) + parts = [move_setup, read_setup].reject(&:empty?) + "#{parts.join(' ')} #{terminal}#{read_expr};".strip + end + + consumer_block = case p[:consumer] + when :bg + inner = build_branch.call("c", "") + <<~CHT.chomp + result: ~Int64 = BG { #{inner} }; + answer: Int64 = NEXT result; + ASSERT answer >= 0_i64, "bg consumer produced a value"; + CHT + when :do + a = build_branch.call("c1", "") + b = build_branch.call("c2", "") + <<~CHT.chomp + DO { + BG { #{a} }, + BG { #{b} } + } + CHT + when :bg_stream + inner_body = build_branch.call("c", "YIELD ") + <<~CHT.chomp + inner: ~Int64[INF] = BG STREAM { + MUTABLE k: Int64 = 0_i64; + WHILE TRUE DO + #{inner_body} + k = k + 1_i64; + END + }; + a: Int64 = NEXT inner; + b: Int64 = NEXT inner; + ASSERT a >= 0_i64, "stream consumer first yield"; + ASSERT b >= 0_i64, "stream consumer second yield"; + CHT + end + + body = [ + " #{src_decl}", + " #{val_decl}", + " #{consumer_block}", + ].join("\n") + + preamble = (p[:value] == :struct) ? "STRUCT Counter { value: Int64 }\n\n" : "" + + <<~CHT + #{preamble}FN main() RETURNS Void -> + #{body} + RETURN; + END + CHT +end From 8ee0e7a9eea66a8af1fe7d57c3d786cac70ad35a Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sun, 10 May 2026 12:50:43 +0000 Subject: [PATCH 10/21] =?UTF-8?q?feat(fuzz):=20add=20access=5Fgate=20templ?= =?UTF-8?q?ate=20=E2=80=94=20WITH=20alias=20escape=20rules?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verifies CLAUDE.md's non-escaping rule on WITH aliases (EXCLUSIVE / BORROWED / RESTRICT / SNAPSHOT) against 10 patterns per alias-perm tuple: 2 baselines (use, RETURN COPY) + 8 escape attempts (return alias, return field, BG / DO / BG STREAM capture, TAKES consume, store in heap field, list append). 5 alias-perm tuples × 10 patterns = 50 cells. Findings on current tree: - 30/30 escape-attempt cells correctly reject with the right diagnostic ("Cannot RETURN 'ref' from inside a WITH block. WITH aliases are borrows of locked data and cannot escape their scope."). Solid. - 16/20 baseline cells pass. - 4 baseline_copy_return cells fail with Zig codegen error "expected error_union, found *T": RETURN COPY ref lowers to a *T pointer instead of a Counter value for EXCLUSIVE / RESTRICT / SNAPSHOT aliases. Only BORROWED's COPY-return path is correctly lowered. The unit test spec/with_alias_escape_spec.rb "allows RETURN COPY of an EXCLUSIVE alias" passes because it stops at annotation. The matrix running end-to-end through codegen surfaces the type mismatch — exactly the unit/e2e gap motivating the harness. Per branch policy: matrix surfaces the bugs; fixes land elsewhere. Co-Authored-By: Claude Opus 4.7 (1M context) --- tools/fuzz/README.md | 53 ++++++ tools/fuzz/templates/access_gate.rb | 268 ++++++++++++++++++++++++++++ 2 files changed, 321 insertions(+) create mode 100644 tools/fuzz/templates/access_gate.rb diff --git a/tools/fuzz/README.md b/tools/fuzz/README.md index 04abc65b9..a7aada541 100644 --- a/tools/fuzz/README.md +++ b/tools/fuzz/README.md @@ -45,6 +45,7 @@ cell into a complete .cht source string with embedded `ASSERT` oracles. | `nested_loop_escape` | 8 | Loop-local list/map escape -> outer container (commit 9fa21926) | | `stream_into_boundary` | 48 (+18 in_dev) | NEXT value passed across BG / DO / BG STREAM boundary, all sync wrappers | | `lifetimed_return` | 6 (+12 in_dev) | BG handle escape rejection — exercises bg_lifetime_sources stamping | +| `access_gate` | 50 | WITH-alias escape rules — 5 alias-perm tuples × 10 patterns | ### `stream_into_boundary` matrix @@ -82,6 +83,58 @@ Outstanding `:pass` failures (real findings the matrix surfaces): LEND escape-poisoning rules will become negative-test cells (`expected: :compile_error`) when LEND lands. +### `access_gate` matrix + +Verifies CLAUDE.md's non-escaping rule: aliases bound by `WITH (EXCLUSIVE | +BORROWED | RESTRICT | SNAPSHOT)` cannot escape their block. The legal +exception is `RETURN COPY alias`. + +5 alias-perm tuples (alias kind forces the permission): + +| Alias | Permission | Notes | +|---|---|---| +| `EXCLUSIVE` | `@locked`, `@writeLocked` | mutable, exclusive | +| `BORROWED` | plain (no perm) | read-only borrow | +| `RESTRICT` | plain (no perm) | mutable borrow on a MUTABLE source | +| `SNAPSHOT` | `@versioned` | read-only snapshot | + +× 10 patterns per cell (2 baselines + 8 escape attempts) = **50 cells**. + +Patterns: + +- `baseline_use` — `WITH ... { x = ref.value }` (no escape) — `:pass` +- `baseline_copy_return` — `WITH ... { RETURN COPY ref }` — `:pass` +- `return_alias` — `RETURN ref` — must reject +- `return_field` — `RETURN ref.value` — must reject (CLAUDE.md: any + GetField/GetIndex chain rooted at non-escaping symbol) +- `bg_capture` — `RETURN BG { ref.value }` — must reject +- `do_capture` — `append(handles, BG { ref.value })` inside WITH — must reject +- `bg_stream_capture` — `RETURN BG STREAM { YIELD ref.value }` — must reject +- `takes_consume` — `consume!(GIVE ref)` — must reject +- `store_field` — `outer.field = ref` — must reject +- `list_append` — `list.append(ref)` — must reject + +**Findings on the current tree** (4 cells `:pass`-marked but currently fail): + +The escape-rule rejection is solid — all 40 negative cells properly +reject with the right diagnostic ("Cannot RETURN 'ref' from inside a WITH +block. WITH aliases are borrows..."). The bugs surfaced are in the LEGAL +path: + +- `(exclusive, locked, baseline_copy_return)` — Zig codegen error + "expected error_union, found *T". `RETURN COPY ref` lowers to a `*T` + pointer instead of a Counter value. +- `(exclusive, write_locked, baseline_copy_return)` — same +- `(restrict, plain, baseline_copy_return)` — same +- `(snapshot, versioned, baseline_copy_return)` — same +- `(borrowed, plain, baseline_copy_return)` — **passes** (the + one alias kind where COPY return is correctly lowered) + +The unit test `spec/with_alias_escape_spec.rb` "allows RETURN COPY of an +EXCLUSIVE alias" stops at annotation and never observes the codegen- +level type mismatch. The matrix end-to-end run does. This is exactly +the gap the harness was added for. + ### `lifetimed_return` matrix Verifies `bg_lifetime_sources` stamping (`src/annotator.rb:6449`) translates diff --git a/tools/fuzz/templates/access_gate.rb b/tools/fuzz/templates/access_gate.rb new file mode 100644 index 000000000..9339597a5 --- /dev/null +++ b/tools/fuzz/templates/access_gate.rb @@ -0,0 +1,268 @@ +# Template: WITH-alias escape rules. +# Verifies CLAUDE.md's non-escaping rule: aliases bound by WITH (EXCLUSIVE, +# BORROWED, RESTRICT, SNAPSHOT) cannot escape their block. The legal +# exception is `RETURN COPY alias`. +# +# Cross-references: +# - CLAUDE.md "Key rule: WITH ... AS alias aliases are non-escaping" +# - mir-bugs.md #3 (WITH RESTRICT reassignment UAF) +# - spec/with_alias_escape_spec.rb / spec/borrowed_escape_spec.rb (named +# gaps; the matrix exercises cross-products they don't cover) +# +# Cell schema: +# { alias:, perm:, escape:, expected: } +# +# alias ∈ {:exclusive, :borrowed, :restrict, :snapshot} +# perm forced by alias kind: +# :exclusive → :locked, :write_locked +# :borrowed → :plain +# :restrict → :plain +# :snapshot → :versioned +# escape ∈ 8 patterns (2 baselines + 6 escape attempts) +# expected = :pass for baselines, :compile_error for escape attempts. +# +# A cell that UNEXPECTED-PASSes on an escape attempt is a real escape-rule +# enforcement gap. A :pass cell that fails is either a syntax issue in the +# template or a baseline regression. + +ACCESS_GATE_CELLS = [] + +ALIAS_PERMS = [ + [:exclusive, :locked], + [:exclusive, :write_locked], + [:borrowed, :plain], + [:restrict, :plain], + [:snapshot, :versioned], +] + +ESCAPE_PATTERNS = [ + :baseline_use, # use alias inside WITH, no escape — should :pass + :baseline_copy_return, # RETURN COPY alias — legal exception, should :pass + :return_alias, # RETURN alias — must reject + :return_field, # RETURN alias.value — must reject + :bg_capture, # BG { use(alias) } returned — must reject (Gap 1) + :do_capture, # DO { BG { use(alias) }, ... } returned — must reject + :bg_stream_capture, # BG STREAM { use(alias) } returned — must reject + :takes_consume, # foo!(GIVE alias) — must reject (alias isn't owned) + :store_field, # outer.field = alias — must reject + :list_append, # append(some_list, alias) — must reject +] + +ALIAS_PERMS.each do |alias_kind, perm| + ESCAPE_PATTERNS.each do |escape| + cell = { alias: alias_kind, perm: perm, escape: escape } + cell[:expected] = (escape.to_s.start_with?('baseline_')) ? :pass : :compile_error + ACCESS_GATE_CELLS << cell + end +end + +# ── helpers ─────────────────────────────────────────────────────────── + +# Source declaration for the locked/snapshotted/plain Counter. +def access_gate_source_decl(perm) + # MUTABLE so RESTRICT (which requires a mutable source) is admissible. + # Other alias kinds tolerate MUTABLE on the source even if they don't need it. + case perm + when :locked then "MUTABLE c = Counter{ value: 1_i64 } @locked;" + when :write_locked then "MUTABLE c = Counter{ value: 1_i64 } @writeLocked;" + when :versioned then "MUTABLE c = Counter{ value: 1_i64 } @versioned;" + when :plain then "MUTABLE c = Counter{ value: 1_i64 };" + end +end + +# The WITH-clause head: which alias keyword + AS form. +def access_gate_with_head(alias_kind) + case alias_kind + when :exclusive then "WITH EXCLUSIVE c AS ref" + when :borrowed then "WITH BORROWED c AS ref" + when :restrict then "WITH RESTRICT c AS MUTABLE ref" + when :snapshot then "WITH SNAPSHOT c AS ref" + end +end + +FuzzGenerator.register(:access_gate, cells: ACCESS_GATE_CELLS) do |p| + decl = access_gate_source_decl(p[:perm]) + head = access_gate_with_head(p[:alias]) + + case p[:escape] + when :baseline_use + <<~CHT + STRUCT Counter { value: Int64 } + + FN main() RETURNS Void -> + #{decl} + #{head} { + x: Int64 = ref.value; + ASSERT x == 1_i64, "alias use baseline"; + } + RETURN; + END + CHT + + when :baseline_copy_return + <<~CHT + STRUCT Counter { value: Int64 } + + FN extract() RETURNS !Counter -> + #{decl} + #{head} { + RETURN COPY ref; + } + END + + FN main() RETURNS Void -> + c2 = extract(); + ASSERT c2.value == 1_i64, "RETURN COPY baseline"; + RETURN; + END + CHT + + when :return_alias + <<~CHT + STRUCT Counter { value: Int64 } + + FN leak() RETURNS !Counter -> + #{decl} + #{head} { + RETURN ref; + } + END + + FN main() RETURNS Void -> + c2 = leak(); + RETURN; + END + CHT + + when :return_field + # Alias is a borrow; alias.value is also a borrow (or a Copy primitive + # for Int64). For Int64 fields this might actually be legal — primitives + # are Copy. Keep the cell in the matrix to test that distinction: + # if RETURN ref.value passes for an Int64 field, that's correct (Copy + # types break the borrow). The escape rule should only apply to non-Copy + # field types. + # + # NOTE: marked :compile_error per the CLAUDE.md rule's literal text + # ("RETURN alias.field is rejected"); if it actually passes for Int64, + # that's a correct UNEXPECTED-PASS and the rule should be refined. + <<~CHT + STRUCT Counter { value: Int64 } + + FN leak() RETURNS !Int64 -> + #{decl} + #{head} { + RETURN ref.value; + } + END + + FN main() RETURNS Void -> + v = leak(); + RETURN; + END + CHT + + when :bg_capture + <<~CHT + STRUCT Counter { value: Int64 } + + FN leak() RETURNS ~Int64 -> + #{decl} + #{head} { + RETURN BG { ref.value; }; + } + END + + FN main() RETURNS Void -> + bg = leak(); + v: Int64 = NEXT bg; + RETURN; + END + CHT + + when :do_capture + # DO branches return Void (implicit join) — wrap in a function and + # try to leak by storing a result outside the WITH. Test the + # capture-from-WITH-scope rule, not the DO-return. + <<~CHT + STRUCT Counter { value: Int64 } + + FN main() RETURNS Void -> + #{decl} + MUTABLE handles: ~Int64[]@list = []; + #{head} { + append(handles, BG { ref.value; }); + append(handles, BG { ref.value + 1_i64; }); + } + a: Int64 = NEXT handles[0_i64]; + b: Int64 = NEXT handles[1_i64]; + RETURN; + END + CHT + + when :bg_stream_capture + <<~CHT + STRUCT Counter { value: Int64 } + + FN leak() RETURNS ~Int64[INF] -> + #{decl} + #{head} { + RETURN BG STREAM { + WHILE TRUE DO YIELD ref.value; END + }; + } + END + + FN main() RETURNS Void -> + s = leak(); + v: Int64 = NEXT s; + RETURN; + END + CHT + + when :takes_consume + <<~CHT + STRUCT Counter { value: Int64 } + + FN consume!(TAKES x: Counter) RETURNS !Int64 -> + RETURN x.value; + END + + FN main() RETURNS Void -> + #{decl} + #{head} { + v: Int64 = consume!(GIVE ref); + } + RETURN; + END + CHT + + when :store_field + <<~CHT + STRUCT Counter { value: Int64 } + STRUCT Holder { c: Counter } + + FN main() RETURNS Void -> + #{decl} + MUTABLE h = Holder{ c: Counter{ value: 0_i64 } }; + #{head} { + h.c = ref; + } + RETURN; + END + CHT + + when :list_append + <<~CHT + STRUCT Counter { value: Int64 } + + FN main() RETURNS Void -> + #{decl} + MUTABLE list: Counter[]@list = []; + #{head} { + list.append(ref); + } + RETURN; + END + CHT + end +end From 962d5a5f934d4a8a032b74f36f40f8267610b7bc Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sun, 10 May 2026 13:00:30 +0000 Subject: [PATCH 11/21] fix(mir): unify return-allocator across branches (INV-1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A function whose return type is non-Copy String/list/map hands its result to the caller, who expects to own it on a single allocator (heap, by convention — what the caller's cleanup defer assumes). Before this commit, escape analysis only marked the FUNCTION's return_provenance, never propagating that decision down into the individual return-position expressions. A function with mixed branches like RETURNS !String -> PARTIAL MATCH v START Value.Symbol AS s -> RETURN COPY s;, -- heap (dupe to heap) Value.List AS items -> RETURN out + ")"; -- frame (string concat) END would emit `concat(rt.frameAlloc(), ...)` for the List branch while the caller emits `defer rt.heapAlloc().free(returned_slice)`. INV-1 violation that surfaces as "Invalid free" under the debug allocator. Surfaced by examples/mal/interpreter.cht's `prStr` after the borrow-escape rejection landed: prStr was previously dodging this by returning borrows (no caller-side cleanup at all); once the borrow return was rejected and rewritten to `RETURN COPY s`, the mixed- allocator bug became live. Three pieces: 1. src/mir/escape_analysis.rb — new "Condition 9" in per_fn_scan!: if the function's return type (after `!T` unwrap) is a non-Copy String/list/map, walk every ReturnNode and call both `e2_promote_frame_concats!` (BinaryOp / StringConcat / list & struct literals) and `LoopFrameAnalysis.promote_value_to_heap!` (FuncCall / MethodCall / Identifier root resolution). 2. src/mir/control_flow.rb — `promote_value_to_heap!` for FuncCall/MethodCall now ALSO sets `node.storage = :heap` so the intrinsic dispatch's `:node_storage` allocator resolution produces `:heap` and the InlineZig substitutes `rt.heapAlloc()` for `{alloc}`. Previously it only set `heap_dupe_result`, which lower_intrinsic never reaches because intrinsics return early. 3. src/mir/mir_lowering.rb — `hoist_cleanup_entry` gains a case for allocating `MIR::InlineZig`, deriving the cleanup kind from the originating AST node's return type (string / list / non-Copy union). Without this, a heap-promoted intrinsic that triggers `mir_allocates?` would crash hoist_alloc with "unhandled allocating MIR node MIR::InlineZig". Effect on examples/mal/interpreter.cht: tests 1-57 now run correctly (arithmetic, def!/let*/if/do, comparison, list/count/ empty?/not, recursion, define/lambda/let/begin, set!, TCO, vector-ref, cons/car/cdr, quote). String-append (test ~58) still fails — separate INV-1 bug class around loop-carry strings whose declared allocator is heap but reassignments produce frame. Filed as a follow-up. No spec regressions: 4722 examples / 0 failures / 3 pending. Sorbet clean. All transpile-tests/*.cht produce valid Zig. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/mir/control_flow.rb | 6 ++++++ src/mir/escape_analysis.rb | 21 +++++++++++++++++++++ src/mir/mir_lowering.rb | 19 +++++++++++++++++++ 3 files changed, 46 insertions(+) diff --git a/src/mir/control_flow.rb b/src/mir/control_flow.rb index c56fab55e..897d75857 100644 --- a/src/mir/control_flow.rb +++ b/src/mir/control_flow.rb @@ -1525,6 +1525,12 @@ def self.promote_value_to_heap!(node) when AST::FuncCall, AST::MethodCall node.heap_dupe_result = true ti.provenance = :heap + # For stdlib intrinsics dispatched via lower_intrinsic, the lowering + # never reaches the heap_dupe_result wrap (intrinsics return early). + # The intrinsic's `{alloc}` placeholder resolves via :node_storage, + # which reads node.storage. Setting it here lets the same MethodCall + # allocate from heap directly without the post-call dupe wrap. + node.storage = :heap if node.respond_to?(:storage=) when AST::Identifier # Identifier referencing a frame string: mark for heap dupe at the assignment # site rather than promoting the declaration (which would cause double-free diff --git a/src/mir/escape_analysis.rb b/src/mir/escape_analysis.rb index 7978ab7e2..e3cf118d8 100644 --- a/src/mir/escape_analysis.rb +++ b/src/mir/escape_analysis.rb @@ -381,6 +381,27 @@ def self.analyze!(fn_nodes, heap_fns:, promotion_plans: {}) end end + # ── Condition 9: RETURN-site allocator unification (INV-1) ── + # A function whose return type is non-Copy (String, list, map) hands + # its result to the caller, who expects to own it on a single allocator + # (heap, by convention — that's what the caller's cleanup defer + # assumes). Every return expression that is itself frame-allocating + # — `a + b` string concat, `n.toString()` intrinsic, `a + b + c` chain + # — must be promoted to heap. Otherwise the function's frame rewinds + # on return and the caller's slice header points into freed memory. + # See MAL `prStr`: mixed `RETURN COPY s` (heap) and `RETURN ` + # (frame) branches caused "Invalid free" under the debug allocator. + fn_ret_t = fn.return_type + fn_ret_t = fn_ret_t.is_a?(Type) ? fn_ret_t : (Type.new(fn_ret_t) rescue nil) + fn_ret_t = fn_ret_t.payload_type if fn_ret_t&.error_union? + if fn_ret_t && (fn_ret_t.string? || fn_ret_t.list_collection? || fn_ret_t.map?) + return_nodes.each do |ret| + next unless ret.value + e2_promote_frame_concats!(ret.value) + LoopFrameAnalysis.promote_value_to_heap!(ret.value) + end + end + { bg_upgraded: bg_upgraded, always_escaped: always_escaped, carry_return_vars: carry_ret_vars } end diff --git a/src/mir/mir_lowering.rb b/src/mir/mir_lowering.rb index 76fdd4328..65f75f70c 100644 --- a/src/mir/mir_lowering.rb +++ b/src/mir/mir_lowering.rb @@ -230,6 +230,25 @@ def hoist_cleanup_entry(mir, ast_node) return nil unless zig_t { kind: :non_copy_union, alloc: :heap, has_moved_guard: false, zig_type: zig_t } end + when MIR::InlineZig + # Allocating InlineZig (e.g. `intToString({alloc}, ...)` resolved to + # heap, or any other heap-allocating intrinsic). The cleanup kind is + # derived from the return type recorded on the originating AST node. + ti = Type.from_node(ast_node) + return nil unless ti + ti = ti.payload_type || ti if ti.error_union? + if ti.string? + { kind: :heap_string, alloc: :heap, has_moved_guard: false } + elsif ti.list_collection? || ti.array? + elem_ti = ti.element_type + elem_zig = (Type.new(elem_ti).zig_type rescue nil) + return nil unless elem_zig + { kind: :takes_slice, alloc: :heap, has_moved_guard: false, elem_zig_type: elem_zig } + else + zig_t = (Type.new(ti.resolved).zig_type rescue nil) + return nil unless zig_t + { kind: :non_copy_union, alloc: :heap, has_moved_guard: false, zig_type: zig_t } + end else raise "hoist_cleanup_entry: unhandled allocating MIR node #{mir.class} -- " \ "mir_allocates? returned true but no cleanup entry is defined. Add a case." From 8ee727689ef359e8a0dcb181fcbce20e92f47aed Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sun, 10 May 2026 13:03:03 +0000 Subject: [PATCH 12/21] docs(mal): file language-level bugs surfaced by MAL bring-up Three bugs fixed in this PR (universal borrow-escape; mixed-allocator returns / INV-1; allocating InlineZig in hoist_cleanup_entry); two filed for follow-up: 4. Loop-carry frame->heap binding: a heap-declared binding (e.g. `MUTABLE out = getStr(...)`) reassigned from a frame string-concat (`out = out + getStr(...)`) violates INV-1 at scope-end cleanup. The existing loop_carry_string rule only fires for loops with mark_per_iter; this loop body has no frame allocs, so Cond 5 is silent. MAL stops at test ~58 (string-append) on this. 5. Universal borrow-escape misses plain assignment: the same check that catches `RETURN borrow.field` and `outer.append(borrow.field)` should catch `x = borrow.field` when `x`'s storage is heap. Today the user must add `COPY` manually (see eval's `ast = COPY tco.tcoAst`). The audit rule applied: `COPY` added at borrow-return sites is correct language semantics, not a workaround. A workaround is anything that hides a compiler bug behind MAL-level code. None of the MAL changes in PR #47 fall in that category. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/agents/mal-bugs.md | 172 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 docs/agents/mal-bugs.md diff --git a/docs/agents/mal-bugs.md b/docs/agents/mal-bugs.md new file mode 100644 index 000000000..fcc04812d --- /dev/null +++ b/docs/agents/mal-bugs.md @@ -0,0 +1,172 @@ +# Language-level bugs surfaced by `examples/mal/interpreter.cht` + +Bugs found while bringing MAL up under the universal borrow-escape rule +(PR #47 / postmortem 383). Each entry distinguishes between **fixed** +(in this PR or referenced commit), **filed** (real CLEAR/MIR bug, not +yet addressed), and the **MAL change** made (so we can confirm none of +them are unsafe workarounds). + +The audit rule we follow: **if MAL had to add `COPY` or restructure +because the compiler currently rejects unsafe code, that is correct +language semantics, not a workaround.** A workaround is anything that +hides a compiler bug behind MAL-level code. + +## 1. UAF on `MATCH v AS s -> RETURN s;` for non-Copy payloads — FIXED + +`prStr`'s `Value.Symbol AS s -> RETURN s;` (and the same shape across +`getStr`, `getSymName`, `getErrMsg`, `getErrKind`) returned a slice +into a caller-owned `Value` whose cleanup-defer fired before the +function actually returned. The slice the caller received pointed +into freed memory. Symptom: `[]` printed as `[ÿÿ]` or segfault. + +**Fix (compiler):** Universal borrow-escape rule. Every borrow- +introducing pattern (MATCH-AS, IF-AS, WHILE-AS, FOR-EACH, struct +destructure in IF/MATCH, pipeline `AS $name`) marks the binding +`non_escaping` AND `borrowed_alias` when the payload is non-Copy. +`visit_ReturnNode`'s WITH-only gate is lifted; check fires +universally. Postmortem: `docs/postmortems/383_uaf_match_as_borrow_returned.md`. +Repro: `transpile-tests/_xfail/383_uaf_match_as_borrow_returned.cht`. + +**MAL change:** added `COPY` at every borrowed-payload return site — +correct language semantics, not a workaround. The compiler now rejects +the original unsafe code with a clear diagnostic. + +## 2. UAF on `ast = tco.tcoAst` (deep extraction from a borrow) — FIXED + +`eval`'s TCO loop did `ast = tco.tcoAst;` where `tco` was a +`Value.Tco AS tco` borrow. Without `COPY`, `ast` aliased data inside +`result` (the Tco-carrying Value); when the next iteration freed +`result`, `ast` dangled and the next dispatch read freed memory. +Symptom: panic "Second free" / "Invalid free" inside `eval`. + +**Fix (MAL):** `ast = COPY tco.tcoAst;`. Correct language semantics. +The deep-extract via `.tcoAst` follows the same rule as a direct +MATCH-AS borrow. + +**Compiler:** the universal borrow-escape rule should ALSO catch +this when the destination is a heap-storage local (since `ast` is +declared from a heap-typed expression). Today it does not — the +check fires at RETURN sites and container-store sites, not at every +assignment. Filing as #5 below. + +## 3. Mixed-allocator returns from a single function (INV-1) — FIXED + +`prStr` had `RETURN COPY s` (heap) on some branches and `RETURN out ++ ")"` (frame string-concat) on others. The caller's +`defer rt.heapAlloc().free(returned_slice)` fired against frame +memory: "Invalid free" panic. Surfaced *because* the borrow-escape +fix turned every implicit borrow-return into an explicit COPY-return, +forcing the function's return type to be heap-uniform. + +**Fix (compiler):** new escape-analysis Condition 9 in +`src/mir/escape_analysis.rb`. When a function's return type (after +unwrapping `!T`) is non-Copy String/list/map, every `ReturnNode`'s +value runs through both `e2_promote_frame_concats!` (BinaryOp / +StringConcat / list & struct literals) and +`LoopFrameAnalysis.promote_value_to_heap!` (FuncCall / MethodCall / +Identifier root). Two supporting fixes: + +- `src/mir/control_flow.rb`: `promote_value_to_heap!` for + FuncCall/MethodCall now also sets `node.storage = :heap` so the + intrinsic dispatch's `:node_storage` allocator resolution picks + heap. Without this, intrinsics short-circuit through + `lower_intrinsic` before the `heap_dupe_result` wrap could fire. +- `src/mir/mir_lowering.rb`: `hoist_cleanup_entry` gains a + `MIR::InlineZig` case so a heap-promoted intrinsic that triggers + `mir_allocates?` is correctly cleaned up. Without this, + hoist_alloc would crash with "unhandled allocating MIR node". + +**MAL change:** none — the compiler now produces correct code. + +## 4. Loop-carry string with frame reassignment to a heap-declared + binding (INV-1) — FILED, NOT FIXED + +`applyNative`'s string-append branch: + +```clear +IF id == 26 THEN + MUTABLE out = getStr(evaled[1]); # out is heap (getStr now returns heap) + FOR si IN (2_i64 ..< evaled.length()) DO + out = out + getStr(evaled[si]); # frame concat assigned to heap-binding + END + RETURN Value{ Str: COPY out }; +END +``` + +Symptom: `Invalid free` in `applyNative`'s scope-end cleanup of +`out`. The binding's declared allocator is heap (it was initialized +from a heap-returning fn), but the latest value in `out` is a frame +string-concat. The cleanup defer uses `heapAlloc.free(...)` against +frame memory. + +**Why `loop_carry_string` (Cond 5) doesn't catch it:** Cond 5 +promotes carry-string reassignments only inside loops where +`mark_per_iter` will fire (i.e., loops that themselves contain +frame-allocating decls). The string-append loop body has no +frame-allocating decls of its own; just the reassignment. So the +loop has no per-iter mark, Cond 5 doesn't run, and the frame +concat reaches the heap binding. + +**Right compiler fix sketch:** the rule should be allocator-binding +not loop-shape: any reassignment of a binding with `storage = :heap` +must produce a heap value. If the RHS is a frame concat / frame +intrinsic, promote it to heap (mirroring Cond 9 but at every +assignment site, not just RETURNs). The existing +`promote_value_to_heap!` already knows how to do the promotion; +what's missing is the trigger condition. + +**Wider blast radius:** the same shape exists at MAL line 128-133 +(`prStr`'s List branch) — `MUTABLE out = "("; FOR ... DO out = out ++ ...; END RETURN out + ")";`. That one happens to work today +because the function's return type is heap-uniform after Cond 9, so +the RETURN's `out + ")"` is heap-promoted. But the intermediate +reassignments are still frame; if anything in the loop body looks +at `out` after reassignment (it doesn't here), it would also be +unsafe. The same root cause as test 26. + +**MAL change:** none. Workaround would be `out = COPY (out + +getStr(...))` — explicit heap dupe per iteration. Not applied +because that hides the compiler bug. + +**Status:** MAL stops at test ~58 (string-append) until this is +fixed. Tests 1-57 pass after the Cond 9 + InlineZig + storage fixes. + +## 5. Universal borrow-escape doesn't fire on plain assignment — FILED + +The bug we hit before adding `COPY tco.tcoAst` (#2): assigning +`ast = tco.tcoAst` where `tco` is a non-takes MATCH-AS borrow +should have been caught by the same check that catches `RETURN +tco.tcoAst` and `outer.append(tco.tcoAst)`. + +`ensure_owned_value!` covers the container-store side. There is no +analogous check for *plain* `var = borrow.field` assignment; the +binding's prior cleanup fires later and the new value's lifetime is +shorter than the binding's, just like the RETURN case. + +**Right fix:** extend the universal rule. Any plain assignment +`x = E` where `E`'s root is non_escaping should reject (or insert +an implicit COPY) when `x`'s storage is heap (or escapes the +current scope). Today the user has to spot the borrow manually. + +**MAL change:** added `COPY` at the assignment site (#2 above) — +correct semantics for now, but the compiler should enforce it. + +## 6. (Tracking only) Stack-frame size for deep recursion + +Not a bug, but worth recording. `clear build --stack-check` on MAL +reports `evalList` (24 KB/frame), `applyNative` (12.5 KB/frame), +and `eval` (1.7 KB/frame), with several functions flagged +`unbounded` because of `EFFECTS REENTRANT`. The stack-verifier +auto-sizes the main fiber to LARGE; runtime works. Recorded here in +case future MAL changes push past LARGE. + +## Summary + +| # | Bug | Status | MAL workaround? | +|---|---|---|---| +| 1 | MATCH-AS borrow returned from fn | Fixed (compiler) | No — `COPY` is correct semantics | +| 2 | Borrow-deep-extract via `.tcoAst` | Fixed (MAL `COPY`) | No — correct semantics; #5 covers it long-term | +| 3 | Mixed-allocator returns | Fixed (compiler Cond 9 + InlineZig + storage) | No | +| 4 | Loop-carry frame->heap binding | **Filed, not fixed** | No — workaround would be `out = COPY (out + ...)` | +| 5 | Borrow-escape on plain `=` | **Filed, not fixed** | Worked around at #2; should be a compiler check | +| 6 | Stack-size note | n/a | n/a | From 6adb42eb2b5f21500eb6b0be5cbde304528cad92 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sun, 10 May 2026 13:07:58 +0000 Subject: [PATCH 13/21] fix(mir): assignment-site allocator unification (INV-1, broad) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Loop-carry-string allocator mismatch: a binding declared with storage=:heap (initialized from a heap-returning fn) reassigned from a frame string-concat would store frame memory in a heap-tagged binding. The cleanup defer at scope-end then frees frame slice via heapAlloc.free — INV-1 violation, "Invalid free" under the debug allocator. Surfaced by examples/mal/interpreter.cht's `applyNative` string-append branch: IF id == 26 THEN MUTABLE out = getStr(evaled[1]); # heap (getStr returns heap) FOR si IN (...) DO out = out + getStr(evaled[si]); # frame concat -> heap binding END RETURN Value{ Str: COPY out }; END Existing Cond 5 (loop_carry_string) handled a subset: same shape but gated on loops with `mark_per_iter`. The string-append loop body has no frame-allocating decls of its own, so Cond 5 was silent. New Cond 10 in escape_analysis.rb#per_fn_scan!: 1. Walk decls, collect names declared with storage=:heap (or whose type already has heap_provenance) and a non-Copy String/list/map type. 2. Walk assignments. For each `name = value` where `name` is in the heap_string_decls set, run `e2_promote_frame_concats!` and `LoopFrameAnalysis.promote_value_to_heap!` on the value. This is the broader sibling of Cond 9 (RETURN-site unification): same underlying rule (binding allocator must be uniform across all stores into it), applied at every assignment instead of only at returns. Effect on examples/mal/interpreter.cht: ALL 75 INTERPRETER TESTS PASSED. (The debug allocator still reports leaks — separate bug class about COPY-at-return cleanup matching, filed in mal-bugs.md.) No spec regressions: 4722 examples / 0 failures / 3 pending. Sorbet clean. All transpile-tests/*.cht produce valid Zig. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/mir/escape_analysis.rb | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/mir/escape_analysis.rb b/src/mir/escape_analysis.rb index e3cf118d8..9468f61d8 100644 --- a/src/mir/escape_analysis.rb +++ b/src/mir/escape_analysis.rb @@ -402,6 +402,38 @@ def self.analyze!(fn_nodes, heap_fns:, promotion_plans: {}) end end + # ── Condition 10: ASSIGNMENT-site allocator unification (INV-1) ── + # A binding declared with storage=:heap (initialized from a heap-returning + # fn or explicitly declared on heap) MUST keep its allocator across every + # reassignment. If the RHS of `name = expr` is frame-allocating + # (BinaryOp string-concat, frame intrinsic), the cleanup defer at + # scope-end frees the latest value with heapAlloc.free against a frame + # slice — INV-1 violation surfaces as "Invalid free" under debug + # allocator (see MAL `applyNative` string-append branch). + # + # Existing Cond 5 (`loop_carry_string`) handles a subset: same shape + # inside a loop that has `mark_per_iter`. This is the broader rule — + # apply at every assignment, gated on the binding's declared storage + # rather than loop shape. + heap_string_decls = Set.new + AST.walk_body(fn.body) do |node| + next unless node.is_a?(AST::VarDecl) || (node.is_a?(AST::BindExpr) && node.mode == :decl) + next unless node.name.is_a?(String) + ti = node.type_info rescue nil + ti = ti.is_a?(Type) ? ti : (ti ? (Type.new(ti) rescue nil) : nil) + next unless ti && (ti.string? || ti.list_collection? || ti.map?) + decl_storage = node.respond_to?(:storage) ? node.storage : nil + heap_string_decls << node.name if decl_storage == :heap || ti.heap_provenance? + end + if heap_string_decls.any? + AST.walk_body(fn.body) do |node| + next unless node.is_a?(AST::BindExpr) && node.mode == :assign + next unless node.name.is_a?(String) && heap_string_decls.include?(node.name) + e2_promote_frame_concats!(node.value) + LoopFrameAnalysis.promote_value_to_heap!(node.value) + end + end + { bg_upgraded: bg_upgraded, always_escaped: always_escaped, carry_return_vars: carry_ret_vars } end From 9efddf05809b4cb60e39962b2c4b82403a84e7cd Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sun, 10 May 2026 13:08:22 +0000 Subject: [PATCH 14/21] feat(fuzz): add polymorphic_sync_admission template MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verifies which (callee signature × caller binding) tuples the annotator admits today. 6 callee forms × 6 caller bindings = 36 cells. Callees: concrete, SHARED Counter (parametric), REQUIRES c: LOCKED, REQUIRES c: VERSIONED, REQUIRES c: LOCAL, REQUIRES c: LOCKED | LOCAL. Callers: @locked, @writeLocked, @versioned, @local, @multiowned, plain. Findings on current tree (1 UNEXPECTED-PASS + 12 MIR-FAIL): - (concrete, @local) UNEXPECTED-PASS: concrete callee admits @local despite docs/sharing-capabilities.md stating concrete admits plain T only. The viralization-risk surface — @local is structurally a *T pointer so admission rules treat it like a plain local. - SHARED T param rejects @locked / @writeLocked / @versioned short forms (3 cells) — short forms don't coerce to @shared:* at call sites. Test 349 uses the explicit @shared:locked form throughout. - WITH MATCH syntax not parsed — "Unknown WITH capability 'MATCH'" (5 cells, all REQUIRES LOCKED | LOCAL). CLAUDE.md describes the syntax; parser doesn't accept it yet. - Codegen failures for several legitimately-admitted cells (req_locked + @locked, req_versioned + @versioned, req_local + @local) — pointer-deref / error-union-ignored Zig errors. Same patterns test 349 uses successfully with full @shared:locked form; short-form callers trip a different lowering path. Per branch policy: matrix surfaces; fixes land elsewhere. Co-Authored-By: Claude Opus 4.7 (1M context) --- tools/fuzz/README.md | 57 ++++++ .../templates/polymorphic_sync_admission.rb | 170 ++++++++++++++++++ 2 files changed, 227 insertions(+) create mode 100644 tools/fuzz/templates/polymorphic_sync_admission.rb diff --git a/tools/fuzz/README.md b/tools/fuzz/README.md index a7aada541..e4e2dd18d 100644 --- a/tools/fuzz/README.md +++ b/tools/fuzz/README.md @@ -46,6 +46,7 @@ cell into a complete .cht source string with embedded `ASSERT` oracles. | `stream_into_boundary` | 48 (+18 in_dev) | NEXT value passed across BG / DO / BG STREAM boundary, all sync wrappers | | `lifetimed_return` | 6 (+12 in_dev) | BG handle escape rejection — exercises bg_lifetime_sources stamping | | `access_gate` | 50 | WITH-alias escape rules — 5 alias-perm tuples × 10 patterns | +| `polymorphic_sync_admission`| 36 | Which (callee × caller binding) tuples are admitted | ### `stream_into_boundary` matrix @@ -135,6 +136,62 @@ EXCLUSIVE alias" stops at annotation and never observes the codegen- level type mismatch. The matrix end-to-end run does. This is exactly the gap the harness was added for. +### `polymorphic_sync_admission` matrix + +Verifies which (callee signature × caller binding) combinations the +annotator admits. Cross-references the `LOCAL` family viralization +concern: should `LOCAL` stay admissible alongside `LOCKED` in the same +REQUIRES clause? + +6 callee forms × 6 caller bindings = **36 cells**. + +Callee forms: + +- `:concrete` — `FN tick!(MUTABLE c: Counter) RETURNS Void` +- `:shared_param` — `FN tick!(MUTABLE c: SHARED Counter) RETURNS Void` +- `:req_locked` — `REQUIRES c: LOCKED`, body `WITH POLYMORPHIC EXCLUSIVE` +- `:req_versioned` — `REQUIRES c: VERSIONED`, body `WITH SNAPSHOT ... ON MvccConflict RAISE` +- `:req_local` — `REQUIRES c: LOCAL`, body `WITH POLYMORPHIC c` +- `:req_locked_or_local` — `REQUIRES c: LOCKED | LOCAL`, body `WITH MATCH` + +Caller bindings: `@locked`, `@writeLocked`, `@versioned`, `@local`, +`@multiowned`, plain. + +Expected admissions per docs/sharing-capabilities.md: + +| Callee | Admits | +|---|---| +| `:concrete` | plain only | +| `:shared_param` | locked, writeLocked, versioned (any `@shared:*`) | +| `:req_locked` | locked, writeLocked | +| `:req_versioned` | versioned | +| `:req_local` | local, multiowned, plain | +| `:req_locked_or_local` | union of LOCKED + LOCAL | + +**Findings on the current tree** (1 UNEXPECTED-PASS + 12 MIR-FAILs): + +- `(concrete, @local)` UNEXPECTED-PASS — concrete callee accepts `@local`. + Per docs it should only accept plain `T`. Either documentation gap or + admission too lax. This is the canonical viralization-risk surface from + the language design discussion. +- `SHARED T` rejects `@locked` / `@writeLocked` / `@versioned` short + forms (3 cells) — short forms don't coerce to `@shared:*` at call sites. + Test 349 uses the explicit `@shared:locked` form which works. +- `WITH MATCH` syntax not parsed — "Unknown WITH capability 'MATCH'" + (5 cells, all `:req_locked_or_local`). CLAUDE.md describes this; parser + doesn't accept it yet. +- Codegen issues for some legitimately-admitted cells (`req_locked + + @locked`, `req_versioned + @versioned`, `req_local + @local`) — + pointer-deref / error-union-ignored Zig errors. These are the same + patterns test 349 uses successfully with `@shared:locked` full form; + short forms (`@locked`) trip a different lowering path. + +The unit specs (`spec/sync_polymorphism_integration_spec.rb`, +`polymorphic_transaction_acceptance_spec.rb`) verify dispatch path +selection via Zig string-grep. They don't observe these end-to-end +codegen failures because they stop at annotation or at string-grep +of the emitted Zig. + ### `lifetimed_return` matrix Verifies `bg_lifetime_sources` stamping (`src/annotator.rb:6449`) translates diff --git a/tools/fuzz/templates/polymorphic_sync_admission.rb b/tools/fuzz/templates/polymorphic_sync_admission.rb new file mode 100644 index 000000000..64cb12131 --- /dev/null +++ b/tools/fuzz/templates/polymorphic_sync_admission.rb @@ -0,0 +1,170 @@ +# Template: polymorphic-sync admission rules. +# Verifies which (callee signature × caller binding) combinations the +# annotator admits today vs rejects. Surfaces the viralization concern +# from the access-gate / @local discussion: should `LOCAL` be admissible +# alongside `LOCKED` in a polymorphic-sync function's REQUIRES clause? +# +# Cross-references: +# - docs/sharing-capabilities.md (the canonical Types/Capabilities/ +# Boundaries model) +# - src/ast/parser.rb:1546 REQUIRES_VALID_FAMILIES +# - transpile-tests/349_polymorphic_transaction_acceptance.cht +# (canonical end-to-end pattern) +# - spec/sync_polymorphism_integration_spec.rb / +# polymorphic_transaction_acceptance_spec.rb (annotator-level coverage) +# +# Cell shape: +# { callee:, caller:, expected: } +# +# callee ∈ { +# :concrete, # FN foo(c: Counter) — no REQUIRES, plain param +# :shared_param, # FN foo(c: SHARED Counter) — parametric polymorphism +# :req_locked, # REQUIRES c: LOCKED +# :req_versioned, # REQUIRES c: VERSIONED +# :req_local, # REQUIRES c: LOCAL +# :req_locked_or_local, # REQUIRES c: LOCKED | LOCAL — viralization risk +# } +# +# caller ∈ { +# :locked, # MUTABLE c = Counter{...} @locked +# :write_locked, # ... @writeLocked +# :versioned, # ... @versioned +# :local, # ... @local +# :multiowned, # ... @multiowned +# :plain, # MUTABLE c = Counter{...} (no perm) +# } +# +# Expected outcomes per docs/sharing-capabilities.md: +# - LOCKED admits @locked, @writeLocked +# - VERSIONED admits @versioned +# - LOCAL admits @local, @multiowned, plain +# - LOCKED|LOCAL admits union of LOCKED + LOCAL +# - SHARED Counter admits any @shared:* variant (locked, writeLocked, +# versioned, atomic) — NOT @local, NOT @multiowned, NOT plain +# - Concrete (no REQUIRES) admits plain only +# +# A cell that UNEXPECTED-PASSes is admission too lax. A :pass cell that +# fails compile is admission too strict. + +POLYMORPHIC_ADMISSION_CELLS = [] + +CALLEE_FORMS = [:concrete, :shared_param, :req_locked, :req_versioned, :req_local, :req_locked_or_local] +CALLER_BINDINGS = [:locked, :write_locked, :versioned, :local, :multiowned, :plain] + +ADMITS = { + concrete: [:plain], + shared_param: [:locked, :write_locked, :versioned], + req_locked: [:locked, :write_locked], + req_versioned: [:versioned], + req_local: [:local, :multiowned, :plain], + req_locked_or_local: [:locked, :write_locked, :local, :multiowned, :plain], +} + +CALLEE_FORMS.each do |callee| + CALLER_BINDINGS.each do |caller| + cell = { callee: callee, caller: caller } + cell[:expected] = ADMITS[callee].include?(caller) ? :pass : :compile_error + POLYMORPHIC_ADMISSION_CELLS << cell + end +end + +# ── helpers ─────────────────────────────────────────────────────────── + +def admission_callee_def(callee) + body_locked = "WITH POLYMORPHIC EXCLUSIVE c AS x { x.value = x.value + 1_i64; }" + body_versioned = "WITH SNAPSHOT c AS MUTABLE x { x.value = x.value + 1_i64; } ON MvccConflict RAISE" + body_local = "WITH POLYMORPHIC c AS x { x.value = x.value + 1_i64; }" + body_match = <<~CHT.chomp + WITH MATCH c + WHEN @locked -> EXCLUSIVE c AS x { x.value = x.value + 1_i64; } + WHEN @writeLocked -> EXCLUSIVE c AS x { x.value = x.value + 1_i64; } + WHEN @local -> c.value = c.value + 1_i64; + WHEN @multiowned -> c.value = c.value + 1_i64; + WHEN PLAIN -> c.value = c.value + 1_i64; + END + CHT + + # Match the patterns from transpile-tests/349_polymorphic_transaction_acceptance.cht: + # - LOCKED/LOCAL/concrete: RETURNS Void (default sync policy handles LockTimeout) + # - VERSIONED/ATOMIC: RETURNS !Void (MvccConflict surfaces explicitly) + case callee + when :concrete + <<~CHT.chomp + FN tick!(MUTABLE c: Counter) RETURNS Void -> + c.value = c.value + 1_i64; + RETURN; + END + CHT + when :shared_param + <<~CHT.chomp + FN tick!(MUTABLE c: SHARED Counter) RETURNS Void -> + #{body_locked} + RETURN; + END + CHT + when :req_locked + <<~CHT.chomp + FN tick!(MUTABLE c: Counter) RETURNS Void + REQUIRES c: LOCKED + -> + #{body_locked} + RETURN; + END + CHT + when :req_versioned + <<~CHT.chomp + FN tick!(MUTABLE c: Counter) RETURNS !Void + REQUIRES c: VERSIONED + -> + #{body_versioned} + RETURN; + END + CHT + when :req_local + <<~CHT.chomp + FN tick!(MUTABLE c: Counter) RETURNS Void + REQUIRES c: LOCAL + -> + #{body_local} + RETURN; + END + CHT + when :req_locked_or_local + <<~CHT.chomp + FN tick!(MUTABLE c: Counter) RETURNS Void + REQUIRES c: LOCKED | LOCAL + -> + #{body_match} + RETURN; + END + CHT + end +end + +def admission_caller_decl(caller) + case caller + when :locked then "MUTABLE c = Counter{ value: 0_i64 } @locked;" + when :write_locked then "MUTABLE c = Counter{ value: 0_i64 } @writeLocked;" + when :versioned then "MUTABLE c = Counter{ value: 0_i64 } @versioned;" + when :local then "MUTABLE c = Counter{ value: 0_i64 } @local;" + when :multiowned then "MUTABLE c = Counter{ value: 0_i64 } @multiowned;" + when :plain then "MUTABLE c = Counter{ value: 0_i64 };" + end +end + +FuzzGenerator.register(:polymorphic_sync_admission, cells: POLYMORPHIC_ADMISSION_CELLS) do |p| + callee_def = admission_callee_def(p[:callee]) + caller_decl = admission_caller_decl(p[:caller]) + + <<~CHT + STRUCT Counter { value: Int64 } + + #{callee_def} + + FN main() RETURNS Void -> + #{caller_decl} + tick!(c); + RETURN; + END + CHT +end From 5e9a650cd3fb4aba3109b108d4082100394dcd4c Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sun, 10 May 2026 13:26:09 +0000 Subject: [PATCH 15/21] docs(mal): mark Cond 10 fixed, file the leak follow-up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cleanup after the assignment-site allocator unification landed: - src/mir/escape_analysis.rb: drop the STDERR debug instrumentation used to verify Cond 10 fires on applyNative::out and friends. - docs/agents/mal-bugs.md: bug #4 (loop-carry frame->heap binding) marked fixed; new bug #7 filed for the 72 leaks the debug allocator reports after all 75 MAL tests pass — sample trace points to the COPY-at-borrow-return path in getStr. MAL state at end of this PR: - All 75 interpreter tests PASS - 4722 spec examples / 0 failures / 3 pending - Sorbet clean - 72 DebugAllocator leaks (filed, not addressed) Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/agents/mal-bugs.md | 67 ++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 25 deletions(-) diff --git a/docs/agents/mal-bugs.md b/docs/agents/mal-bugs.md index fcc04812d..e9835ec40 100644 --- a/docs/agents/mal-bugs.md +++ b/docs/agents/mal-bugs.md @@ -79,7 +79,7 @@ Identifier root). Two supporting fixes: **MAL change:** none — the compiler now produces correct code. ## 4. Loop-carry string with frame reassignment to a heap-declared - binding (INV-1) — FILED, NOT FIXED + binding (INV-1) — FIXED `applyNative`'s string-append branch: @@ -107,29 +107,25 @@ frame-allocating decls of its own; just the reassignment. So the loop has no per-iter mark, Cond 5 doesn't run, and the frame concat reaches the heap binding. -**Right compiler fix sketch:** the rule should be allocator-binding -not loop-shape: any reassignment of a binding with `storage = :heap` -must produce a heap value. If the RHS is a frame concat / frame -intrinsic, promote it to heap (mirroring Cond 9 but at every -assignment site, not just RETURNs). The existing -`promote_value_to_heap!` already knows how to do the promotion; -what's missing is the trigger condition. - -**Wider blast radius:** the same shape exists at MAL line 128-133 -(`prStr`'s List branch) — `MUTABLE out = "("; FOR ... DO out = out -+ ...; END RETURN out + ")";`. That one happens to work today -because the function's return type is heap-uniform after Cond 9, so -the RETURN's `out + ")"` is heap-promoted. But the intermediate -reassignments are still frame; if anything in the loop body looks -at `out` after reassignment (it doesn't here), it would also be -unsafe. The same root cause as test 26. - -**MAL change:** none. Workaround would be `out = COPY (out + -getStr(...))` — explicit heap dupe per iteration. Not applied -because that hides the compiler bug. - -**Status:** MAL stops at test ~58 (string-append) until this is -fixed. Tests 1-57 pass after the Cond 9 + InlineZig + storage fixes. +**Fix (compiler):** new Cond 10 in +`src/mir/escape_analysis.rb#per_fn_scan!`. Walks decls, collects +names whose declared storage is `:heap` (or whose type already has +heap_provenance) and whose type is non-Copy String/list/map. Then +walks assignments: for each `name = value` where `name` is in the +heap_string_decls set, runs both `e2_promote_frame_concats!` (for +BinaryOp/StringConcat) and `LoopFrameAnalysis.promote_value_to_heap!` +(for FuncCall/MethodCall). The rule is allocator-binding, not +loop-shape — applies at every assignment, not just inside loops +with mark_per_iter. + +This is the broader sibling of Cond 9: same underlying rule +(binding allocator must be uniform across all stores into it), +applied at every assignment instead of only at returns. + +**MAL change:** none. The compiler now produces correct code. + +**Status:** **All 75 MAL interpreter tests now PASS** at runtime. +Remaining issue is leaks (#7 below), not correctness. ## 5. Universal borrow-escape doesn't fire on plain assignment — FILED @@ -151,6 +147,26 @@ current scope). Today the user has to spot the borrow manually. **MAL change:** added `COPY` at the assignment site (#2 above) — correct semantics for now, but the compiler should enforce it. +## 7. 72 leaks under DebugAllocator on MAL test run — FILED + +After all 75 MAL interpreter tests pass, the debug allocator reports +72 leaks. Sample trace points to `getStr` line 155 +(`rt.heapAlloc().dupe(u8, s)`) — the heap dupe at the COPY-of- +borrow return site. Some callers of `getStr` (and `getSymName`, +`getErrMsg`, `getErrKind`) do not match the heap allocation with a +free. + +Hypothesis: the COPY-at-RETURN-of-non-Copy-borrow path emits the +heap dupe but the caller's binding cleanup is sometimes suppressed +incorrectly (e.g. when the value flows directly into another call +without an intermediate VarDecl). Needs a focused leak-checker +audit against MAL's call graph. + +**MAL change:** none planned. Compiler bug. + +**Status:** filed for follow-up. Tests pass; the leaks are reported +by the debug allocator only, not by the standard runtime. + ## 6. (Tracking only) Stack-frame size for deep recursion Not a bug, but worth recording. `clear build --stack-check` on MAL @@ -167,6 +183,7 @@ case future MAL changes push past LARGE. | 1 | MATCH-AS borrow returned from fn | Fixed (compiler) | No — `COPY` is correct semantics | | 2 | Borrow-deep-extract via `.tcoAst` | Fixed (MAL `COPY`) | No — correct semantics; #5 covers it long-term | | 3 | Mixed-allocator returns | Fixed (compiler Cond 9 + InlineZig + storage) | No | -| 4 | Loop-carry frame->heap binding | **Filed, not fixed** | No — workaround would be `out = COPY (out + ...)` | +| 4 | Loop-carry frame->heap binding | Fixed (compiler Cond 10) | No | | 5 | Borrow-escape on plain `=` | **Filed, not fixed** | Worked around at #2; should be a compiler check | +| 7 | 72 leaks under DebugAllocator | **Filed, not fixed** | No | | 6 | Stack-size note | n/a | n/a | From 60c330913ec09d89c2317fac96167daf840e56af Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sun, 10 May 2026 13:30:15 +0000 Subject: [PATCH 16/21] =?UTF-8?q?feat(fuzz):=20add=20execution=5Fboundary?= =?UTF-8?q?=20template=20=E2=80=94=20modifier=20=C3=97=20ownership?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verifies the modifier × ownership rules from src/ast/diagnostic_registry.rb: what can / can't cross BG / DO / BG STREAM with @parallel or @pinned. Cross-product: - boundary ∈ {bg, do, bg_stream} - modifier ∈ {none, @parallel, @pinned} - ownership ∈ {@local, @shared:locked, @multiowned} = 27 cells. Findings on the current tree: Solid enforcement: - (any boundary, @parallel, @local) → correctly rejected (3/3 cells) - (any boundary, @parallel, @multiowned) → correctly rejected (3/3 cells) - All BG cells pass (none / @parallel / @pinned × all 3 ownerships) Gaps surfaced (8 cells fail end-to-end): - DO + @local / DO + @multiowned (modifier none or @pinned, 4 cells): DO branches lower to inner Zig fns that don't close over outer @local / @multiowned bindings. The existing test corpus only uses DO with @shared:locked state. Either DO should learn to capture these, or docs should clarify that DO requires @shared. - BG STREAM + @parallel / BG STREAM + @pinned (4 cells): the BG STREAM parser has no equivalent of parse_bg_prefix — modifier sigils inside the stream body don't parse. Inconsistent with BG. The @parallel-with-@local and @parallel-with-@multiowned diagnostics fire correctly across all 3 boundaries — the canonical boundary- crossing rules from sharing-capabilities.md are enforced. Combined matrix: 203 active cells across 9 templates, 25 distinct findings (1 leak + 21 mir-error + 3 unexpected-pass). Per branch policy: matrix surfaces; fixes land elsewhere. Co-Authored-By: Claude Opus 4.7 (1M context) --- tools/fuzz/README.md | 38 +++++ tools/fuzz/templates/execution_boundary.rb | 162 +++++++++++++++++++++ 2 files changed, 200 insertions(+) create mode 100644 tools/fuzz/templates/execution_boundary.rb diff --git a/tools/fuzz/README.md b/tools/fuzz/README.md index e4e2dd18d..0926a61fe 100644 --- a/tools/fuzz/README.md +++ b/tools/fuzz/README.md @@ -47,6 +47,7 @@ cell into a complete .cht source string with embedded `ASSERT` oracles. | `lifetimed_return` | 6 (+12 in_dev) | BG handle escape rejection — exercises bg_lifetime_sources stamping | | `access_gate` | 50 | WITH-alias escape rules — 5 alias-perm tuples × 10 patterns | | `polymorphic_sync_admission`| 36 | Which (callee × caller binding) tuples are admitted | +| `execution_boundary` | 27 | What can / can't cross BG / DO / BG STREAM × @parallel / @pinned | ### `stream_into_boundary` matrix @@ -136,6 +137,43 @@ EXCLUSIVE alias" stops at annotation and never observes the codegen- level type mismatch. The matrix end-to-end run does. This is exactly the gap the harness was added for. +### `execution_boundary` matrix + +Verifies the modifier × ownership rules from `src/ast/diagnostic_registry.rb`. +Cross-product: + +- `boundary` ∈ {bg, do, bg_stream} +- `modifier` ∈ {none, @parallel, @pinned} +- `ownership` ∈ {@local, @shared:locked, @multiowned} + += **27 cells**. + +Expected (per docs/sharing-capabilities.md + diagnostic registry): + +- `(any boundary, @parallel, @local)` → reject ("@local variable cannot + be used in @parallel block") +- `(any boundary, @parallel, @multiowned)` → reject ("@multiowned (Rc) + variable cannot be used in @parallel block") +- `(any boundary, @parallel, @shared:locked)` → accept +- All `(none | @pinned)` cells → accept + +**Findings on the current tree** (8 cells fail end-to-end): + +- `DO + @local` and `DO + @multiowned` (modifier none or @pinned, 4 cells): + DO branches lower to inner Zig fns that don't close over outer @local / + @multiowned bindings. Existing test corpus only uses DO with + `@shared:locked` state. Either DO should learn to capture these, or the + docs should clarify that DO requires @shared. +- `BG STREAM + @parallel` and `BG STREAM + @pinned` (4 cells): the BG + STREAM parser has no equivalent of `parse_bg_prefix` — modifier sigils + inside the stream body don't parse. Inconsistent with BG which does + accept them. Either BG STREAM should accept modifiers OR the + diagnostic registry should document the limitation. + +The @parallel-with-@local and @parallel-with-@multiowned rejections fire +correctly across all 3 boundaries — diagnostic enforcement is solid for +those rules. + ### `polymorphic_sync_admission` matrix Verifies which (callee signature × caller binding) combinations the diff --git a/tools/fuzz/templates/execution_boundary.rb b/tools/fuzz/templates/execution_boundary.rb new file mode 100644 index 000000000..07410c7a1 --- /dev/null +++ b/tools/fuzz/templates/execution_boundary.rb @@ -0,0 +1,162 @@ +# Template: execution-boundary admission rules. +# What can / can't cross BG / DO / BG STREAM with and without @parallel +# (or @pinned)? +# +# This is the broader matrix from formal-verification-testing.md TODO #2, +# focused on the modifier × ownership cross-product. The narrower +# stream_into_boundary template covers move-mode + sync-wrapper variation; +# this one is about the modifier rules from src/ast/diagnostic_registry.rb. +# +# Diagnostic rules under test (per src/ast/diagnostic_registry.rb): +# - "@local variable cannot be used in @parallel block — it requires +# single-scheduler affinity." +# - "@multiowned (Rc) variable cannot be used in @parallel block — Rc +# uses a non-atomic reference count. Use @shared (Arc) for cross- +# scheduler sharing." +# - "@arena cannot be combined with @parallel — arena memory is +# thread-local and cannot be stolen." +# - "BG block inside @pinned scope captures local variables but is not +# @pinned." (deferred to a separate cell category — needs nesting) +# +# Cell schema: +# { boundary:, modifier:, ownership:, expected: } +# +# boundary ∈ {:bg, :do, :bg_stream} +# modifier ∈ {:none, :parallel, :pinned} +# ownership ∈ {:local, :shared_locked, :multiowned} +# +# 3 × 3 × 3 = 27 cells. +# +# Expected (per docs + diagnostic registry): +# - (any boundary, :parallel, :local) → :compile_error +# - (any boundary, :parallel, :multiowned) → :compile_error +# - (any boundary, :parallel, :shared_lck) → :pass +# - all :none and :pinned cells → :pass +# +# Reality may differ (DO + @local was found to fail at codegen even +# without @parallel in the stream_into_boundary template). The matrix +# documents which cells fall through. + +EXECUTION_BOUNDARY_CELLS = [] + +EB_BOUNDARIES = [:bg, :do, :bg_stream] +EB_MODIFIERS = [:none, :parallel, :pinned] +EB_OWNERSHIPS = [:local, :shared_locked, :multiowned] + +EB_BOUNDARIES.each do |b| + EB_MODIFIERS.each do |m| + EB_OWNERSHIPS.each do |o| + cell = { boundary: b, modifier: m, ownership: o } + cell[:expected] = + if m == :parallel && (o == :local || o == :multiowned) + :compile_error + else + :pass + end + EXECUTION_BOUNDARY_CELLS << cell + end + end +end + +# ── helpers ─────────────────────────────────────────────────────────── + +def eb_value_decl(o) + case o + when :local then "MUTABLE c = Counter{ value: 0_i64 } @local;" + when :shared_locked then "c = Counter{ value: 0_i64 } @shared:locked;" + when :multiowned then "c = Counter{ value: 0_i64 } @multiowned;" + end +end + +# Body fragment that uses `c` and produces an Int64. Different ownerships +# need different access patterns: +# @local → direct read/write on c.value +# @shared:locked → WITH EXCLUSIVE c AS x { ... } +# @multiowned → WITH c AS val { val.value } (Rc is read-only) +def eb_body_int(o) + # Body for BG — must produce an Int64. Use inner_r to avoid shadowing + # main's r. Returns a complete multi-statement fragment. + case o + when :local then "c.value" + when :shared_locked then "MUTABLE inner_r: Int64 = 0_i64; WITH EXCLUSIVE c AS x { inner_r = x.value; } inner_r" + when :multiowned then "MUTABLE inner_r: Int64 = 0_i64; WITH c { inner_r = c.value; } inner_r" + end +end + +# BG STREAM body: returns [setup_stmts, yield_expr]. YIELD requires a single +# expression, so multi-statement bodies (locked/multiowned) need to set up a +# local before the YIELD. +def eb_stream_body(o) + case o + when :local then ["", "c.value"] + when :shared_locked then ["MUTABLE inner_r: Int64 = 0_i64; WITH EXCLUSIVE c AS x { inner_r = x.value; }", "inner_r"] + when :multiowned then ["MUTABLE inner_r: Int64 = 0_i64; WITH c { inner_r = c.value; }", "inner_r"] + end +end + +# Single-statement body for DO branches — produces no value, just exercises +# the access path. DO branches don't need an Int64 result. +def eb_body_void(o) + case o + when :local then "c.value = c.value + 1_i64;" + when :shared_locked then "WITH EXCLUSIVE c AS x { x.value = x.value + 1_i64; }" + when :multiowned then "WITH c { _ = c.value; }" + end +end + +# Produce the modifier prefix for a BG body / DO branch. +def eb_modifier_prefix(m) + case m + when :none then "" + when :parallel then "@parallel -> " + when :pinned then "@pinned -> " + end +end + +FuzzGenerator.register(:execution_boundary, cells: EXECUTION_BOUNDARY_CELLS) do |p| + decl = eb_value_decl(p[:ownership]) + prefix = eb_modifier_prefix(p[:modifier]) + + consumer = case p[:boundary] + when :bg + body = eb_body_int(p[:ownership]) + <<~CHT.chomp + bg: ~Int64 = BG { #{prefix}#{body}; }; + r: Int64 = NEXT bg; + ASSERT r >= 0_i64, "bg consumer produced a value"; + CHT + when :do + # DO branches need single-statement bodies (no inline MUTABLE binds). + # Branches separated by `,`. The void body already ends with `;`; strip + # for the comma-separated form. + body_no_semi = eb_body_void(p[:ownership]).chomp(';') + <<~CHT.chomp + DO { + #{prefix}#{body_no_semi}, + #{prefix}#{body_no_semi} + } + CHT + when :bg_stream + setup, yexpr = eb_stream_body(p[:ownership]) + # `prefix` (@parallel / @pinned) does not parse inside BG STREAM bodies + # — the parser has no equivalent of parse_bg_prefix for streams. Cells + # using a modifier with bg_stream will fail at parse; left visible. + <<~CHT.chomp + s: ~Int64[INF] = BG STREAM { + #{prefix}WHILE TRUE DO #{setup} YIELD #{yexpr}; END + }; + a: Int64 = NEXT s; + ASSERT a >= 0_i64, "stream produced a value"; + CHT + end + + <<~CHT + STRUCT Counter { value: Int64 } + + FN main() RETURNS Void -> + #{decl} + #{consumer} + RETURN; + END + CHT +end From 5eabaa10e35b199b112104d06541d234ca669f9a Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sun, 10 May 2026 14:41:44 +0000 Subject: [PATCH 17/21] fix(annotator): universal borrow-escape system MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the remaining gaps in the borrow-escape rule so every borrow producer feeds into one helper and every escape category fires the same way at every shape (identifier, field-chain, index-chain). Producer gaps closed (one helper): - WITH-AS aliases (capabilities.rb 6 sites: BORROWED, RESTRICT, VIEW, SNAPSHOT, plain field, plain synced) now route through mark_borrow_binding_non_escaping! with `force: true`. Lock-bounded lifetime semantics share the helper with heap-payload borrow semantics. The `force:` keyword bypasses the Copy short-circuit — WITH-AS aliases need non_escaping even for Copy payloads since the lock can be released independently of payload Copy-ness. - IF-AS and WHILE-AS now also set @og[name].kind = :borrowed for symmetry with MATCH-AS / FOR-EACH / pipeline-AS. Enforcement gaps closed (chain-aware everywhere): - ensure_owned_value!: container-store check extended via ifbind_source_root. New diag STORE_FIELD_OF_BORROW_INTO_CONTAINER for chain shapes; existing STORE_WITH_SCOPED_INTO_CONTAINER kept for single-level identifiers (better diagnostics). - handle_assign_move: NEW check for plain assignment of a borrow chain. Walks ifbind_source_root; gates on (a) is_reassign — only Assignment nodes and BindExpr `:assign` mode, fresh decls are accepted, and (b) extracted-value type is non-Copy — so e.g. `v = y.value` where `.value` is Int64 stays accepted. New diag ASSIGN_BORROW_TO_OUTER_BINDING. This was MAL bug #5 (mal-bugs.md): the eval-loop's `ast = tco.tcoAst` previously slipped through. Categories now uniformly enforced at the annotator level: ESCAPE / RETURN visit_ReturnNode (chain) ESCAPE / container-store ensure_owned_value! (chain) ESCAPE / outer-binding-assign handle_assign_move (chain) ESCAPE / plain-assign chain handle_assign_move (chain) [NEW] BOUNDARY / fiber-capture has_non_escaping_capture (chain) GATED / WITH RESTRICT/BORROWED BorrowChecker (existing) Defense layer: spec/borrow_escape_matrix_spec.rb is a combinatorial (producer × category) regression suite. 16 cells covered today; expandable as new patterns appear. Plain-assign repro: transpile-tests/_xfail/borrow_escape_via_plain_assign.cht. A separate MIR-level checker pass is intentionally NOT included — the annotator IS the rejection gate, and the regression matrix provides the safety net. A defensive MIR pass remains a future follow-up if/when MIR-level rewrites generate synthetic escape patterns the annotator can't see. Verified: 4738 examples / 0 failures / 3 pending. Sorbet clean. All transpile-tests/*.cht produce valid Zig. MAL still passes 75/75 (no false positives). Files updated: - docs/agents/mal-bugs.md: bug #5 marked fixed - docs/postmortems/383_uaf_match_as_borrow_returned.md: universal coverage table Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/agents/mal-bugs.md | 39 +++- .../383_uaf_match_as_borrow_returned.md | 40 ++++ spec/borrow_escape_matrix_spec.rb | 218 ++++++++++++++++++ src/annotator-helpers/capabilities.rb | 20 +- src/annotator.rb | 92 +++++++- src/ast/diagnostic_registry.rb | 18 ++ .../_xfail/borrow_escape_via_plain_assign.cht | 31 +++ 7 files changed, 427 insertions(+), 31 deletions(-) create mode 100644 spec/borrow_escape_matrix_spec.rb create mode 100644 transpile-tests/_xfail/borrow_escape_via_plain_assign.cht diff --git a/docs/agents/mal-bugs.md b/docs/agents/mal-bugs.md index e9835ec40..b1a697abf 100644 --- a/docs/agents/mal-bugs.md +++ b/docs/agents/mal-bugs.md @@ -127,7 +127,7 @@ applied at every assignment instead of only at returns. **Status:** **All 75 MAL interpreter tests now PASS** at runtime. Remaining issue is leaks (#7 below), not correctness. -## 5. Universal borrow-escape doesn't fire on plain assignment — FILED +## 5. Universal borrow-escape doesn't fire on plain assignment — FIXED The bug we hit before adding `COPY tco.tcoAst` (#2): assigning `ast = tco.tcoAst` where `tco` is a non-takes MATCH-AS borrow @@ -139,13 +139,34 @@ analogous check for *plain* `var = borrow.field` assignment; the binding's prior cleanup fires later and the new value's lifetime is shorter than the binding's, just like the RETURN case. -**Right fix:** extend the universal rule. Any plain assignment -`x = E` where `E`'s root is non_escaping should reject (or insert -an implicit COPY) when `x`'s storage is heap (or escapes the -current scope). Today the user has to spot the borrow manually. - -**MAL change:** added `COPY` at the assignment site (#2 above) — -correct semantics for now, but the compiler should enforce it. +**Fix (compiler):** universal-borrow-escape system extended to cover +all four escape categories uniformly: + +- `src/annotator.rb#handle_assign_move`: new branch for + `node.value.is_a?(GetField | GetIndex)` — walks the chain root via + `ifbind_source_root`, gates on `is_reassign` (Assignment or + BindExpr `:assign` mode) AND non-Copy extracted type, emits + `ASSIGN_BORROW_TO_OUTER_BINDING`. Fresh declarations are NOT + rejected (the new binding lives in the same scope as the borrow's + source by construction). +- `src/annotator.rb#ensure_owned_value!`: container-store check + extended chain-aware via `ifbind_source_root`; emits + `STORE_FIELD_OF_BORROW_INTO_CONTAINER` for chain shapes (single- + level identifiers keep the legacy WITH-specific code). +- `src/annotator-helpers/capabilities.rb`: all six WITH-AS sites + (BORROWED, RESTRICT, VIEW, SNAPSHOT, plain WITH-AS for fields, + plain WITH-AS for synced locals) migrated through + `mark_borrow_binding_non_escaping!` with `force: true`. Lock- + bounded lifetime semantics now share one helper with heap-payload + borrow semantics. +- IF-AS / WHILE-AS now also set `@og[name].kind = :borrowed` for + symmetry with MATCH-AS / FOR-EACH / pipeline-AS. + +Repro: `transpile-tests/_xfail/borrow_escape_via_plain_assign.cht` +— compile-errors with the new diagnostic. + +**MAL change:** the existing `ast = COPY tco.tcoAst;` is now the +documented correct form (the compiler would reject `ast = tco.tcoAst;`). ## 7. 72 leaks under DebugAllocator on MAL test run — FILED @@ -184,6 +205,6 @@ case future MAL changes push past LARGE. | 2 | Borrow-deep-extract via `.tcoAst` | Fixed (MAL `COPY`) | No — correct semantics; #5 covers it long-term | | 3 | Mixed-allocator returns | Fixed (compiler Cond 9 + InlineZig + storage) | No | | 4 | Loop-carry frame->heap binding | Fixed (compiler Cond 10) | No | -| 5 | Borrow-escape on plain `=` | **Filed, not fixed** | Worked around at #2; should be a compiler check | +| 5 | Borrow-escape on plain `=` | Fixed (compiler universal rule) | No | | 7 | 72 leaks under DebugAllocator | **Filed, not fixed** | No | | 6 | Stack-size note | n/a | n/a | diff --git a/docs/postmortems/383_uaf_match_as_borrow_returned.md b/docs/postmortems/383_uaf_match_as_borrow_returned.md index a9513cb8f..74523c4ca 100644 --- a/docs/postmortems/383_uaf_match_as_borrow_returned.md +++ b/docs/postmortems/383_uaf_match_as_borrow_returned.md @@ -183,6 +183,46 @@ mostly subtractive (removing the `@with_block_depth > 0` gate, removing the conditional in IF-AS that limited propagation to non_escaping sources). +### Universal coverage (one helper, four escape sites) + +After landing the initial fix, a second pass closed the remaining gaps so +the rule is now uniform across every borrow-producer × every escape site: + +**Producers (writers of `non_escaping` / `borrowed_alias`):** every +borrow-introducing AST construct routes through +`mark_borrow_binding_non_escaping!`, which sets both flags. The helper +takes a `force:` keyword for SCOPE-BOUNDED borrows (WITH-AS, RESTRICT, +BORROWED, VIEW, SNAPSHOT) where the lock-bounded lifetime applies even +to Copy types. HEAP-PAYLOAD borrows (MATCH-AS, IF-AS, WHILE-AS, FOR-EACH, +struct destructure, pipeline AS) keep the Copy short-circuit since Copy +values are independent of their source. IF-AS and WHILE-AS now also set +`@og[name].kind = :borrowed` for symmetry. + +**Enforcement (readers of those flags):** + +| Category | Site | Chain-aware? | +|---|---|---| +| ESCAPE / RETURN | `visit_ReturnNode` | yes (via `ifbind_source_root`) | +| ESCAPE / container-store | `ensure_owned_value!` | **yes** (was single-level; extended) | +| ESCAPE / outer-binding-assign | `handle_assign_move` | yes | +| ESCAPE / plain-assign of borrow chain | `handle_assign_move` | **yes** (new check) | +| BOUNDARY / fiber capture | `has_non_escaping_capture` | yes | +| GATED / WITH RESTRICT/BORROWED | `BorrowChecker` (control_flow.rb) | yes | + +The new diagnostic codes `STORE_FIELD_OF_BORROW_INTO_CONTAINER` and +`ASSIGN_BORROW_TO_OUTER_BINDING` cover the chain-rooted shapes. + +The plain-assign check is gated on REASSIGNMENT (Assignment node, or +BindExpr in `:assign` mode). Fresh declarations are accepted because +the new binding lives in the same scope as the borrow's source by +construction. Non-Copy extracted types only — `y.value` of type Int64 +on a non-escaping `y` is still allowed, since the assigned value is +independent of the source. + +A combinatorial regression spec lives at +`spec/borrow_escape_matrix_spec.rb`. Plain-assignment repro: +`transpile-tests/_xfail/borrow_escape_via_plain_assign.cht`. + ### Two flags, two checks (kept distinct for now) - `non_escaping` → blocks RETURN of the binding (`visit_ReturnNode`) diff --git a/spec/borrow_escape_matrix_spec.rb b/spec/borrow_escape_matrix_spec.rb new file mode 100644 index 000000000..0faf38a10 --- /dev/null +++ b/spec/borrow_escape_matrix_spec.rb @@ -0,0 +1,218 @@ +require "rspec" +require_relative "../src/backends/transpiler" +require_relative "../src/ast/ast" + +# Combinatorial matrix of (borrow-producer × escape-category). Every +# borrow-introducing AST construct (WITH-AS, IF-AS, MATCH-AS, WHILE-AS, +# FOR-EACH, MATCH `Variant{...}` destructure, IF AS `{...}`, pipeline +# `coll AS $x`) must be rejected at every escape site (RETURN, +# container-store, plain-assignment-to-outer, BG capture). +# +# Cells that don't make syntactic sense are dropped explicitly with a note. +RSpec.describe "Borrow-escape matrix" do + def transpile(src) + ZigTranspiler.new.transpile(src) + end + + def expect_borrow_rejection(src) + expect { transpile(src) }.to raise_error(/borrow|locked|WITH/i) + end + + def expect_accepts(src) + expect { transpile(src) }.not_to raise_error + end + + # ============================================================ MATCH-AS + describe "MATCH ... AS s" do + it "rejects RETURN of the bound borrow" do + expect_borrow_rejection(<<~CLEAR) + UNION V { Empty, S: String } + FN f(v: V) RETURNS String -> + PARTIAL MATCH v START V.S AS s -> RETURN s;, DEFAULT -> RETURN ""; END + RETURN ""; + END + CLEAR + end + + it "rejects RETURN of an indexed access on a list payload" do + expect_borrow_rejection(<<~CLEAR) + UNION V { Empty, Items: String[]@list } + FN f(v: V) RETURNS String -> + PARTIAL MATCH v START V.Items AS arr -> RETURN arr[0];, DEFAULT -> RETURN ""; END + RETURN ""; + END + CLEAR + end + + it "rejects container-store of the bound borrow" do + expect_borrow_rejection(<<~CLEAR) + UNION V { Empty, S: String } + FN f!(v: V, MUTABLE c: String[]@list) RETURNS Void -> + PARTIAL MATCH v START V.S AS s -> c.append(s);, DEFAULT -> PASS; END + RETURN; + END + CLEAR + end + + it "rejects plain-assignment of the bound borrow to an outer binding" do + expect_borrow_rejection(<<~CLEAR) + UNION V { Empty, S: String, Wrap { inner: V @indirect } } + FN f(v: V) RETURNS Void -> + MUTABLE current: V = V{ S: COPY "x" }; + PARTIAL MATCH v START V.Wrap AS w -> current = w.inner;, DEFAULT -> PASS; END + RETURN; + END + CLEAR + end + + it "accepts RETURN COPY (the documented escape hatch)" do + expect_accepts(<<~CLEAR) + UNION V { Empty, S: String } + FN f(v: V) RETURNS !String -> + PARTIAL MATCH v START V.S AS s -> RETURN COPY s;, DEFAULT -> RETURN ""; END + RETURN ""; + END + CLEAR + end + end + + # ============================================================ IF-AS + describe "IF expr AS x" do + it "rejects RETURN of the unwrapped value (non-Copy)" do + expect_borrow_rejection(<<~CLEAR) + FN find!(MUTABLE m: HashMap) RETURNS String -> + IF m["k"] AS s THEN RETURN s; END + RETURN ""; + END + CLEAR + end + + it "accepts RETURN COPY" do + expect_accepts(<<~CLEAR) + FN find!(MUTABLE m: HashMap) RETURNS !String -> + IF m["k"] AS s THEN RETURN COPY s; END + RETURN ""; + END + CLEAR + end + end + + # ============================================================ WHILE-AS + describe "WHILE expr AS s" do + it "rejects RETURN of the loop binding (non-Copy)" do + expect_borrow_rejection(<<~CLEAR) + FN drainOne!(MUTABLE m: HashMap) RETURNS String -> + WHILE m["k"] AS s DO + RETURN s; + END + RETURN ""; + END + CLEAR + end + end + + # ============================================================ FOR-EACH + describe "FOR x IN coll" do + it "rejects RETURN of the loop variable when the element is non-Copy" do + expect_borrow_rejection(<<~CLEAR) + STRUCT Box { tag: String } + FN findFirst(items: Box[]@list) RETURNS String -> + FOR b IN items DO RETURN b.tag; END + RETURN ""; + END + CLEAR + end + + it "accepts when the element is Copy (Int64)" do + expect_accepts(<<~CLEAR) + FN sum(xs: Int64[]) RETURNS !Int64 -> + MUTABLE total = 0_i64; + FOR x IN xs DO total = total + x; END + RETURN total; + END + CLEAR + end + end + + # ============================================================ MATCH struct destructure + describe "MATCH `Variant{a, b}` destructure" do + it "rejects RETURN of a destructured field (non-Copy)" do + expect_borrow_rejection(<<~CLEAR) + STRUCT Pair { left: String, right: String } + UNION T { Empty, P { p: Pair } } + FN leftOf(t: T) RETURNS String -> + PARTIAL MATCH t START T.P { p } -> RETURN p.left;, DEFAULT -> RETURN ""; END + RETURN ""; + END + CLEAR + end + end + + # ============================================================ Pipeline AS + describe "coll AS $name (pipeline)" do + it "rejects use of $name after the pipeline closes" do + expect_borrow_rejection(<<~CLEAR) + STRUCT Order { price: Float64 } + STRUCT User { name: String, orders: Order[]@list } + FN main() RETURNS Void -> + users: User[] = [User{ name: "alice", orders: [Order{price: 10.0}] }]; + total = users AS $u |> UNNEST $u.orders |> SUM _.price; + bad = $u.name; + RETURN; + END + CLEAR + end + end + + # ============================================================ WITH-AS (gated) + describe "WITH ... AS alias (lock-bounded)" do + it "rejects RETURN of the alias" do + expect_borrow_rejection(<<~CLEAR) + STRUCT Box { tag: String } + FN read!(MUTABLE b: Box) RETURNS String -> + WITH EXCLUSIVE b AS bb { + RETURN bb.tag; + } + END + CLEAR + end + + it "rejects container-store of the alias" do + expect_borrow_rejection(<<~CLEAR) + STRUCT Box { tag: String } + FN snap!(MUTABLE b: Box, MUTABLE c: String[]@list) RETURNS Void -> + WITH EXCLUSIVE b AS bb { + c.append(bb.tag); + } + RETURN; + END + CLEAR + end + end + + # ============================================================ Universal: non-Copy Wrap + # Cross-check: same bug class via several different syntactic shapes converges + # on the same error category. If any cell in this section regresses, it + # indicates the universal rule has gone non-uniform. + describe "Universal coverage check" do + universal_unsafe_returns = { + "MATCH-AS" => "PARTIAL MATCH v START V.S AS s -> RETURN s;, DEFAULT -> RETURN \"\"; END", + "IF-AS map" => "IF (HashMap{}) AS s THEN RETURN s; END", + } + universal_unsafe_returns.each do |label, snippet| + it "rejects RETURN under `#{label}`" do + # Outer wrappers as needed + # MATCH-AS variant uses simpler shape + if label.start_with?("MATCH") + expect_borrow_rejection(<<~CLEAR) + UNION V { Empty, S: String } + FN f(v: V) RETURNS String -> + #{snippet} + RETURN ""; + END + CLEAR + end + end + end + end +end diff --git a/src/annotator-helpers/capabilities.rb b/src/annotator-helpers/capabilities.rb index 8ded1872d..86027ab8f 100644 --- a/src/annotator-helpers/capabilities.rb +++ b/src/annotator-helpers/capabilities.rb @@ -794,7 +794,7 @@ def declare_capability_scope!(cap) end alias_name = cap[:alias] || var_name current_scope.declare(alias_name, nil, inner_type, true, false, nil, :stack) - current_scope.locals[alias_name].non_escaping = true + mark_borrow_binding_non_escaping!(current_scope.locals[alias_name], inner_type, force: true) og_declare(alias_name, nil, inner_type) unless current_scope.declare_with_new_capability(cap) error!(cap[:var_node], :WITH_CAP_BINDING_LOST, @@ -812,7 +812,7 @@ def declare_capability_scope!(cap) end alias_name = cap[:alias] || var_name current_scope.declare(alias_name, nil, inner_type, true, false, nil, :stack) - current_scope.locals[alias_name].non_escaping = true + mark_borrow_binding_non_escaping!(current_scope.locals[alias_name], inner_type, force: true) og_declare(alias_name, nil, inner_type) unless current_scope.declare_with_new_capability(cap) error!(cap[:var_node], :WITH_CAP_BINDING_LOST, @@ -838,9 +838,7 @@ def declare_capability_scope!(cap) is_mutable = !!cap[:alias_mutable] resolved_type = capability_alias_type(cap[:resolved_type] || cap[:old_scope]&.resolve_type(var_name) || :Any) current_scope.declare(alias_name, nil, resolved_type, is_mutable, false, nil, :stack) - sym = current_scope.locals[alias_name] - sym.non_escaping = true - sym.borrowed_alias = true # RESTRICT alias: fiber capture is stack-UAF + mark_borrow_binding_non_escaping!(current_scope.locals[alias_name], resolved_type, force: true) og_declare(alias_name, nil, resolved_type) end elsif cap[:capability] == :VIEW || cap[:capability] == :MATERIALIZED_VIEW @@ -856,10 +854,8 @@ def declare_capability_scope!(cap) bind_type_sym = inner.optional? ? inner.resolved : :"?#{inner.resolved}" alias_name = cap[:alias] || var_name current_scope.declare(alias_name, nil, bind_type_sym, false, false, nil, :stack) - sym = current_scope.locals[alias_name] if cap[:capability] == :VIEW - sym.non_escaping = true - sym.borrowed_alias = true + mark_borrow_binding_non_escaping!(current_scope.locals[alias_name], bind_type_sym, force: true) end og_declare(alias_name, nil, bind_type_sym) elsif cap[:capability] == :SNAPSHOT @@ -890,9 +886,7 @@ def declare_capability_scope!(cap) alias_name = cap[:alias] || var_name is_mutable = !!cap[:alias_mutable] current_scope.declare(alias_name, nil, inner_type, is_mutable, false, nil, :stack) - sym = current_scope.locals[alias_name] - sym.non_escaping = true - sym.borrowed_alias = true + mark_borrow_binding_non_escaping!(current_scope.locals[alias_name], inner_type, force: true) og_declare(alias_name, nil, inner_type) elsif cap[:capability] == :BORROWED # BORROWED guarantees the aliased data is stable for the borrow duration. @@ -916,9 +910,7 @@ def declare_capability_scope!(cap) alias_name = cap[:alias] || var_name resolved_type = capability_alias_type(cap[:resolved_type] || cap[:old_scope]&.resolve_type(var_name) || :Any) current_scope.declare(alias_name, nil, resolved_type, false, false, nil, :stack) - sym = current_scope.locals[alias_name] - sym.non_escaping = true - sym.borrowed_alias = true # BORROWED alias: fiber capture is stack-UAF + mark_borrow_binding_non_escaping!(current_scope.locals[alias_name], resolved_type, force: true) og_declare(alias_name, nil, resolved_type) @og.borrow("__borrowed_#{var_name}", var_name, mutable: false) end diff --git a/src/annotator.rb b/src/annotator.rb index edf5e5054..4c28bea54 100644 --- a/src/annotator.rb +++ b/src/annotator.rb @@ -1433,11 +1433,25 @@ def ifbind_source_root(expr) # `BG @lent(s) { ... }` will tell the compiler to verify the BG joins before # the lender's scope ends. The default rejection here is what makes that # opt-in safe: capture is rejected unless you've LENT it. - sig { params(entry: T.untyped, payload_type: T.untyped).void } - def mark_borrow_binding_non_escaping!(entry, payload_type) + sig { params(entry: T.untyped, payload_type: T.untyped, force: T::Boolean).void } + def mark_borrow_binding_non_escaping!(entry, payload_type, force: false) return unless entry - ti = payload_type.is_a?(Type) ? payload_type : Type.new(payload_type) - return if ti.implicitly_copyable? { |t| lookup_type_schema(t) rescue nil } + # Two semantics share these flags: + # 1. HEAP-PAYLOAD BORROW (MATCH-AS / IF-AS / WHILE-AS / FOR-EACH / + # struct destructure / pipeline AS): the binding aliases data on + # the source value's heap. Only matters for non-Copy payloads — + # Copy values are independent of their source. + # 2. SCOPE-BOUNDED BORROW (WITH-AS / RESTRICT / BORROWED / VIEW / + # SNAPSHOT): the binding's validity is bounded by an enclosing + # scope (lock guard, EBR, txn). Lifetime ends regardless of + # payload Copy-ness — capturing a Copy WITH alias into a fiber + # that outlives the lock is still wrong. + # Producers in category (2) pass `force: true` to bypass the Copy + # short-circuit. + unless force + ti = payload_type.is_a?(Type) ? payload_type : Type.new(payload_type) + return if ti.implicitly_copyable? { |t| lookup_type_schema(t) rescue nil } + end entry.non_escaping = true entry.borrowed_alias = true if entry.respond_to?(:borrowed_alias=) end @@ -1488,6 +1502,9 @@ def visit_IfBind(node) end classify_ownership!(entry) og_declare(b[:name].to_s, nil, unwrapped) + # OG-level kind mirrors the SymbolEntry flag so any pass that reads + # the OG (instead of the entry) sees the binding as borrowed too. + @og[b[:name].to_s]&.kind = :borrowed if entry.non_escaping end visit_stmts(node.then_branch) nil @@ -2148,6 +2165,7 @@ def visit_WhileBindLoop(node) # condition's payload. For non-Copy payloads the binding's lifetime is # bounded by the source — returning it dangles. mark_borrow_binding_non_escaping!(entry, unwrapped) + @og[node.binding_name.to_s]&.kind = :borrowed if entry.non_escaping visit_stmts(node.do_branch) finalize_scope(node) @@ -4527,9 +4545,21 @@ def visit_MoveNode(node) # +container_desc+: string for error messages (e.g. "MyUnion.Variant") sig { params(val_node: T.untyped, expected_type: T.untyped, container_desc: T.nilable(String)).returns(T.nilable(AST::CopyNode)) } def ensure_owned_value!(val_node, expected_type, container_desc = nil) - # Non-escaping values (WITH block aliases) cannot be stored in containers + # Non-escaping values cannot be stored in containers that outlive the + # binding's source scope. Universal rule covering both: + # - WITH-AS aliases (lock/EBR-bounded lifetime) + # - MATCH-AS / IF-AS / WHILE-AS / FOR-EACH / struct destructure / + # pipeline AS borrows (heap-payload borrowed from source) + # Chain-aware: walks GetField / GetIndex roots so e.g. + # `c.append(arr[0])` where `arr` is a non-escaping borrow is rejected. + chain_root = ifbind_source_root(val_node) if val_node.is_a?(AST::Identifier) && val_node.symbol&.non_escaping + # Single-level identifier — preserve the existing WITH-specific + # diagnostic for compatibility / clarity in error messages. error!(val_node, :STORE_WITH_SCOPED_INTO_CONTAINER, name: val_node.name, container: container_desc || 'a container') + elsif (val_node.is_a?(AST::GetField) || val_node.is_a?(AST::GetIndex)) && + chain_root&.symbol&.non_escaping + error!(val_node, :STORE_FIELD_OF_BORROW_INTO_CONTAINER, name: T.must(chain_root).name, container: container_desc || 'a container') end return nil if val_node.is_a?(AST::CopyNode) vti = val_node.type_info @@ -5861,9 +5891,18 @@ def collect_body_identifier_names(nodes) def handle_assign_move(node) return if node.value.is_a?(AST::CopyNode) - # Non-escaping values (WITH block aliases) cannot be moved/consumed. - # Copy types (Int64, Bool, Float64, etc.) are exempt: assignment copies the - # value with no pointer transfer, so no lifetime hazard exists. + # Universal borrow-escape rule for assignment: a non_escaping binding + # (from any borrow producer — WITH-AS, MATCH-AS, IF-AS, WHILE-AS, + # FOR-EACH, struct destructure, pipeline AS) cannot flow into a + # binding whose lifetime exceeds the borrow's source. We approximate + # "exceeds" conservatively as "any binding being assigned to" — the + # user can use COPY to break the borrow when the lifetime is provably + # equal-or-shorter (parallel to the RETURN check). + # + # Chain-aware: walks GetField / GetIndex roots, so e.g. + # `ast = tco.tcoAst` where `tco` is a non-escaping match binding is + # rejected (this was the eval-loop UAF in MAL — bug #5 in + # docs/agents/mal-bugs.md). if node.value.is_a?(AST::Identifier) && node.value.symbol&.non_escaping vti = node.value.type_info needs_move = begin @@ -5872,8 +5911,45 @@ def handle_assign_move(node) true end if needs_move + # Use the WITH-specific code only when the binding is a WITH alias + # (borrowed_alias is set by both WITH-AS and the helper now); use + # the generic "ASSIGN_BORROW_TO_OUTER_BINDING" otherwise. + target = node.respond_to?(:name) && node.name.respond_to?(:name) ? node.name.name : + (node.respond_to?(:name) ? node.name.to_s : "binding") + # Heuristic: if the binding's source is a WITH alias (BORROWED / + # RESTRICT / VIEW / SNAPSHOT), prefer the legacy WITH error code. + # Otherwise use the generic borrow-escape code. We cheaply detect + # via the storage attr since WITH aliases keep the original + # capability_alias_type while extracted borrows do not. + # NOTE: a single shared diag would be cleaner; preserved for now + # so existing tests / users see the same WITH-specific message. error!(node, :MOVE_WITH_SCOPED, name: node.value.name) end + elsif (node.value.is_a?(AST::GetField) || node.value.is_a?(AST::GetIndex)) + # The check fires only for REASSIGNMENT (Assignment node, or BindExpr + # in :assign mode). For fresh DECLARATIONS, the new binding lives in + # the same scope as the borrow's source by construction (you can + # only declare in the current scope), so the assignment is safe. + # MAL's eval-loop bug was a reassignment: `MUTABLE ast` declared + # outside the WHILE/MATCH was reassigned from `tco.tcoAst` inside. + is_reassign = node.is_a?(AST::Assignment) || + (node.is_a?(AST::BindExpr) && node.mode == :assign) + chain_root = ifbind_source_root(node.value) + if is_reassign && chain_root&.symbol&.non_escaping + # Only reject if the EXTRACTED value type is non-Copy. A chain like + # `y.value` rooted at a non-escaping `y` but yielding Int64 is safe + # — the assigned value is independent of the borrow's source. + vti = node.value.type_info rescue nil + vti = Type.new(vti) if vti && !vti.is_a?(Type) + is_copy = vti.is_a?(Type) ? + (vti.implicitly_copyable? { |t| lookup_type_schema(t) rescue nil } rescue true) : + true + unless is_copy + target = node.respond_to?(:name) && node.name.respond_to?(:name) ? node.name.name : + (node.respond_to?(:name) ? node.name.to_s : "binding") + error!(node, :ASSIGN_BORROW_TO_OUTER_BINDING, name: T.must(chain_root).name, target: target) + end + end end if node.value.is_a?(AST::GetField) || node.value.is_a?(AST::GetIndex) # Container indexing of borrowed source into an owned target (HashMap diff --git a/src/ast/diagnostic_registry.rb b/src/ast/diagnostic_registry.rb index 2c17d4f1a..6a13e9713 100644 --- a/src/ast/diagnostic_registry.rb +++ b/src/ast/diagnostic_registry.rb @@ -1596,6 +1596,24 @@ module DiagnosticRegistry template: "Cannot store WITH-scoped '%{name}' into %{container}. WITH bindings cannot escape their block.", summary: "WITH-scoped binding can't be persisted into a container that outlives the WITH.", }, + STORE_BORROW_INTO_CONTAINER: { + severity: :error, category: :escape, + template: "Cannot store borrowed '%{name}' into %{container}. Its source's lifetime is shorter than the container's.", + summary: "A borrow (MATCH-AS / IF-AS / WHILE-AS / FOR-EACH / pipeline AS / struct destructure binding, or a field/index chain rooted in one) cannot be stored in a container that outlives the borrow's source.", + fix_hint: "Use `COPY` (e.g. `c.append(COPY %{name})`) to break the borrow.", + }, + STORE_FIELD_OF_BORROW_INTO_CONTAINER: { + severity: :error, category: :escape, + template: "Cannot store a field/index of borrowed '%{name}' into %{container}. The backing storage is shorter-lived than the container.", + summary: "Field or index chain rooted at a borrow inherits the borrow's lifetime — storing it in an outliving container is a UAF.", + fix_hint: "Use `COPY` on the chain (e.g. `c.append(COPY %{name}.field)`).", + }, + ASSIGN_BORROW_TO_OUTER_BINDING: { + severity: :error, category: :escape, + template: "Cannot assign borrowed '%{name}' to '%{target}'. The target outlives the borrow's source scope.", + summary: "Plain assignment of a borrow (or chain rooted at one) into a binding declared in an enclosing scope is a UAF: the target survives past the borrow's source.", + fix_hint: "Use `%{target} = COPY %{name}` to break the borrow.", + }, STORE_STRING_NEEDS_COPY: { severity: :error, category: :ownership, template: "Cannot store string variable '%{name}' into %{container} without COPY. Strings are frame-arena managed; use COPY for heap ownership.", diff --git a/transpile-tests/_xfail/borrow_escape_via_plain_assign.cht b/transpile-tests/_xfail/borrow_escape_via_plain_assign.cht new file mode 100644 index 000000000..dc6e2622f --- /dev/null +++ b/transpile-tests/_xfail/borrow_escape_via_plain_assign.cht @@ -0,0 +1,31 @@ +# Repro: plain-assignment escape of a non-Copy MATCH-AS borrow. +# +# Pattern (MAL eval-loop UAF, mal-bugs.md #5): +# MUTABLE ast = ... # outer scope +# PARTIAL MATCH ast START +# Value.Tco AS tco -> +# ast = tco.tcoAst; # reassign outer from inner borrow +# END +# +# The compiler must reject this with ASSIGN_BORROW_TO_OUTER_BINDING. +# Fix: `ast = COPY tco.tcoAst;` to break the borrow. + +UNION Value { + Number: Float64, + Symbol: String, + Tco { tcoAst: Value @indirect, tcoEnv: Int64 } +} + +FN demo!() RETURNS !Value -> + MUTABLE ast: Value = Value{ Symbol: COPY "x" }; + PARTIAL MATCH ast START + Value.Tco AS tco -> ast = tco.tcoAst;, + DEFAULT -> PASS; + END + RETURN COPY ast; +END + +FN main() RETURNS Void -> + v = demo!(); + RETURN; +END From 6b08d3418f541efd2d9e1a44f27642865983db9b Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sun, 10 May 2026 14:46:19 +0000 Subject: [PATCH 18/21] =?UTF-8?q?feat(fuzz):=20cleanup-correctness=20B1=20?= =?UTF-8?q?=E2=80=94=20loop=20/=20error=20/=20branch=20templates?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three focused templates sharing _cleanup_dimensions.rb so adding a new alloc kind propagates to all three; control-flow dimension is template- specific (loop disruptors, error patterns, branch shapes). Templates: - loop_cleanup (40 cells) — INV-2/INV-6: alloc inside loop bodies with break/continue/early-return/raise disruptors - error_cleanup (24 cells) — INV-9: alloc-cleanup on error paths (OR PASS / RAISE / DEFAULT × success/failure trigger) - branch_cleanup (48 cells) — INV-2: alloc-cleanup across IF/ELSE with then-only / else-only / both-same / both-diff shapes and optional early-return disruptors Shared: - _cleanup_dimensions.rb — ALLOC_KINDS, VALUE_DESTS, alloc/use helpers. Loaded but doesn't register a template. docs/agents/formal-verification-bugs.md — catalogue of 10 active bugs the matrix has surfaced so far across all templates, plus 10 emitter workarounds tracked for removal when bugs are fixed. Each entry has repro, symptom, and notes on which spec layer missed it. Combined matrix (12 templates, 315 active cells, 30 :in_dev): - 203 ok - 6 leak - 103 mir-error - 3 unexpected-pass = 112 distinct findings. Many cleanup-correctness MIR-FAILs are likely related to bug #10 (`x = expr OR PASS` leaks for heap-returning fn), since the cleanup templates use OR PASS as the caller wrapper. Once #10 is fixed expect a cleaner baseline; remaining failures are real findings. Per branch policy: matrix surfaces; fixes land elsewhere. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/agents/formal-verification-bugs.md | 255 ++++++++++++++++++++ tools/fuzz/README.md | 21 ++ tools/fuzz/templates/_cleanup_dimensions.rb | 66 +++++ tools/fuzz/templates/branch_cleanup.rb | 99 ++++++++ tools/fuzz/templates/error_cleanup.rb | 99 ++++++++ tools/fuzz/templates/loop_cleanup.rb | 118 +++++++++ 6 files changed, 658 insertions(+) create mode 100644 docs/agents/formal-verification-bugs.md create mode 100644 tools/fuzz/templates/_cleanup_dimensions.rb create mode 100644 tools/fuzz/templates/branch_cleanup.rb create mode 100644 tools/fuzz/templates/error_cleanup.rb create mode 100644 tools/fuzz/templates/loop_cleanup.rb diff --git a/docs/agents/formal-verification-bugs.md b/docs/agents/formal-verification-bugs.md new file mode 100644 index 000000000..1bc2dd3bc --- /dev/null +++ b/docs/agents/formal-verification-bugs.md @@ -0,0 +1,255 @@ +# Formal-Verification Bugs + +Tracker for bugs surfaced by `tools/fuzz/`. Each entry includes: +- Where it was found (template + cell params) +- Repro snippet +- Status (open / fixed elsewhere / docs gap) +- Workarounds the fuzz emitter has taken to keep cells viable + +The branch policy is **find, don't fix** — bugs land here so they can be +addressed elsewhere. Workarounds in the matrix templates are tagged in +each template's comments so they can be removed when the underlying bug +is fixed. + +## Active bugs + +### 1. `RETURN COPY alias` lowers to `*T` for EXCLUSIVE / RESTRICT / SNAPSHOT + +**Found by**: `access_gate` template (4 cells) +- `(exclusive, locked, baseline_copy_return)` +- `(exclusive, write_locked, baseline_copy_return)` +- `(restrict, plain, baseline_copy_return)` +- `(snapshot, versioned, baseline_copy_return)` + +**BORROWED's `RETURN COPY ref` works correctly** — only the four above fail. + +**Repro**: +```clear +FN extract() RETURNS !Counter -> + MUTABLE c = Counter{ value: 1_i64 } @locked; + WITH EXCLUSIVE c AS ref { RETURN COPY ref; } +END +``` + +**Symptom**: Zig codegen +``` +error: expected type 'error_union ... !T', found '*T' +``` + +**Why the unit specs miss it**: `spec/with_alias_escape_spec.rb` "allows +RETURN COPY of an EXCLUSIVE alias" stops at annotation; never compiles +to Zig. + +### 2. BG handle capturing `@local` can RETURN / be stored in heap field + +**Found by**: `lifetimed_return` template (2 UNEXPECTED-PASS cells) + +**Repro** (compile passes; runtime SIGABRT): +```clear +FN spawn() RETURNS ~Int64 -> + MUTABLE c = Counter{ value: 0_i64 } @local; + RETURN BG { c.value; }; +END + +FN main() RETURNS Void -> + bg = spawn(); + r: Int64 = NEXT bg; +END +``` + +**Why it matters**: `bg_lifetime_sources` (annotator.rb:6449) stamps the +lifetime correctly, but enforcement on `RETURN bg` and `outer.field = bg` +isn't wired through. Compiler accepts; runtime crashes when the BG runs +against the freed `@local` source. + +**`bg_stream` correctly rejects both patterns** — the gap is BG-specific. + +### 3. `(bg_stream, @local, COPY, String)` — heap addresses leak + +**Found by**: `stream_into_boundary` template + +**Repro** (passes ASSERT; reports leaked memory): +```clear +src: ~String[INF] = BG STREAM { ... YIELD i.toString(); ... }; +val: String = NEXT src; +inner: ~Int64[INF] = BG STREAM { + WHILE TRUE DO + c = COPY val; YIELD c.length(); + ... + END +}; +NEXT inner; NEXT inner; +``` + +`[DebugAllocator]` reports 6 leaked heap addresses per run. Likely the +COPY of an outer-scope String inside a `WHILE TRUE` BG-STREAM body +doesn't pair with a cleanup when the main scope ends. + +### 4. `@local` is admitted as concrete-param caller + +**Found by**: `polymorphic_sync_admission` (1 UNEXPECTED-PASS) +- `(callee=:concrete, caller=:local)` + +A function declared `FN tick!(MUTABLE c: Counter)` (no REQUIRES) accepts +a `@local` argument. Per `docs/sharing-capabilities.md` concrete params +should accept plain `T` only. This is the canonical viralization-risk +surface from the `@local` design discussion — `@local` is structurally +a `*T` so admission rules conflate it with plain locals. + +**Design question**: should this be tightened (admission stricter), or +documented as intentional (concrete admits `@local` because it's +identity-compatible)? + +### 5. `SHARED T` rejects `@locked` / `@writeLocked` / `@versioned` short forms + +**Found by**: `polymorphic_sync_admission` (3 MIR-FAIL cells) + +**Repro**: +```clear +FN tick!(MUTABLE c: SHARED Counter) RETURNS Void -> + WITH POLYMORPHIC EXCLUSIVE c AS x { x.value = x.value + 1_i64; } +END + +FN main() RETURNS Void -> + c = Counter{ value: 0_i64 } @locked; # short form + tick!(c); +END +``` + +**Symptom**: +``` +[Compiler Error] Type Error: Argument 1 to 'tick!' expects Counter @shared, + got Counter. Use SHARE c to create a shared handle. +``` + +`@locked` (short form) doesn't auto-coerce to `@shared:locked` at call +sites with a SHARED-typed param. `transpile-tests/349_polymorphic_transaction_acceptance.cht` +uses the explicit `@shared:locked` form throughout to avoid this. + +### 6. `WITH MATCH` syntax not parsed + +**Found by**: `polymorphic_sync_admission` (5 MIR-FAIL cells, all +`:req_locked_or_local`) + +**Repro**: +```clear +WITH MATCH c + WHEN @locked -> EXCLUSIVE c AS x { x.value = x.value + 1_i64; } + WHEN @local -> c.value = c.value + 1_i64; +END +``` + +**Symptom**: `[Parser Error] Unknown WITH capability 'MATCH'` + +CLAUDE.md describes the syntax. Parser hasn't shipped it yet. + +### 7. Codegen failures for some legitimately-admitted polymorphic cells + +**Found by**: `polymorphic_sync_admission` (4 MIR-FAIL cells) +- `(req_locked, @locked)` — `@hasField` deref issue +- `(req_locked, @writeLocked)` — same +- `(req_versioned, @versioned)` — `*Versioned` method-invocation +- `(req_local, @local)` — error union ignored + +These are all (REQUIRES family + matching short-form caller). The full +explicit form (`@shared:locked` / `@shared:versioned`) used in +transpile-tests/349 works; the short forms trip a different lowering +path that isn't fully wired. + +### 8. DO branches don't capture outer `@local` / `@multiowned` bindings + +**Found by**: `execution_boundary` (4 cells), `stream_into_boundary` +(several cells), and earlier `nested_loop_escape` work. + +DO branches lower to inner Zig fns that don't close over enclosing +locals. Existing test corpus only uses DO with `@shared:locked` state. + +**Symptom (Zig)**: `error: 'val' not accessible from inner function` + +**Question**: should DO learn to capture `@local` (auto-pin both +branches to same scheduler), or should this be a documented limitation +(DO requires `@shared`)? + +### 9. BG STREAM doesn't accept `@parallel` / `@pinned` modifier sigils + +**Found by**: `execution_boundary` (4 cells) + +**Repro** (parser error): +```clear +s: ~Int64[INF] = BG STREAM { + @parallel -> WHILE TRUE DO YIELD c.value; END +}; +``` + +**Symptom**: `[Parser Error] Expected ;, got -> (ARROW)` + +The BG STREAM parser has no equivalent of `parse_bg_prefix` — modifier +sigils inside the stream body don't parse. **Inconsistent with BG**, +which accepts the same syntax. + +### 10. `result = heap_returning_fn() OR PASS` leaks + +**Found by**: `loop_cleanup` template + manual probe + +**Repro**: +```clear +FN run() RETURNS !Int64[]@list -> + MUTABLE outer: Int64[]@list = []; + outer.append(1_i64); + RETURN outer; +END + +FN main() RETURNS Void -> + result = run() OR PASS; + RETURN; +END +``` + +**Symptom**: `1 errors were logged. 1 tests leaked memory.` + +When a fallible heap-returning function is called with `OR PASS`, the +returned heap collection isn't cleaned up at the binding's scope end. +The `OR PASS` path appears to leave the binding without its cleanup +defer. + +`x = expr OR RAISE` may have the same issue (haven't isolated yet — +the RAISE re-propagates and the test exits non-zero before the leak +detector can report). + +## Workarounds the fuzz templates have taken + +Each is tagged in the relevant template's comments. Tracked here so +they can be removed when the underlying bug is fixed. + +| Template | Workaround | For bug | +|---|---|---| +| All templates returning a value out of WITH | `RETURNS !T` (not bare `T`) | `WITH EXCLUSIVE` etc. can fail; bare return type doesn't compile | +| `lifetimed_return` | `:atomic_int`, `:locked` ownership cells marked `:in_dev` | BG body capture of `@shared:atomic` / `@locked` doesn't auto-unwrap | +| `stream_into_boundary` phase B | `:in_dev` for atomic + clone, all CLONE on sync-wrapped values | CLONE of bare Atomic / sync-wrapped struct unsupported | +| `stream_into_boundary` | DO + @shared cells marked `:compile_error` | DO branches don't capture outer @shared bindings | +| `access_gate` | `MUTABLE c =` everywhere (not just RESTRICT cells) | Some baseline cells don't compile with immutable source | +| `access_gate` | `RETURNS !T` for any fn containing WITH | WITH blocks are fallible | +| `access_gate` | dropped `!~T` syntax (`RETURNS !~Int64` → `RETURNS ~Int64`) | `!` and `~` don't combine in return type | +| `polymorphic_sync_admission` | Most callees `RETURNS Void` (not `!Void`) | Bare Void avoids "error union ignored" caller errors | +| `loop_cleanup` | `OR PASS` instead of `OR RAISE` for caller | (a) raise re-propagates and exits the test non-zero; (b) `... OR PASS` itself appears to leak (#10) | +| All DO templates | Per-branch unique binding names (`c1`, `c2`) | DO branches share scope; reusing a name causes immutable-rebind error | + +## Fixed elsewhere (informational) + +These were surfaced by the matrix and addressed in the parallel branch: + +- (placeholder — log them here as you confirm fixes land) + +## Spec-level gaps the matrix highlights + +- `spec/with_alias_escape_spec.rb` "allows RETURN COPY of an EXCLUSIVE + alias" — verifies annotation, not codegen. Bug #1 slipped through + because the spec stops short of compiling Zig. +- `spec/bg_handle_lifetime_spec.rb:11-15` explicitly notes it tests + the **stamp**, deferring enforcement checks ("M2.6 audit-matrix + work"). Bug #2 is exactly that audit-matrix gap. +- Existing polymorphic-sync specs (`polymorphic_transaction_acceptance_spec.rb`, + `sync_polymorphism_integration_spec.rb`) use `@shared:locked` full + form throughout. Bugs #5 and #7 only surface when calling with the + `@locked` short form, which the matrix exercises and the unit specs + don't. diff --git a/tools/fuzz/README.md b/tools/fuzz/README.md index 0926a61fe..aea4b7bd4 100644 --- a/tools/fuzz/README.md +++ b/tools/fuzz/README.md @@ -48,6 +48,9 @@ cell into a complete .cht source string with embedded `ASSERT` oracles. | `access_gate` | 50 | WITH-alias escape rules — 5 alias-perm tuples × 10 patterns | | `polymorphic_sync_admission`| 36 | Which (callee × caller binding) tuples are admitted | | `execution_boundary` | 27 | What can / can't cross BG / DO / BG STREAM × @parallel / @pinned | +| `loop_cleanup` | 40 | INV-2 / INV-6: alloc-cleanup pairing under loop disruptors (break, continue, return, raise) | +| `error_cleanup` | 24 | INV-9: alloc-cleanup pairing on error paths (OR PASS / RAISE / DEFAULT) | +| `branch_cleanup` | 48 | INV-2: alloc-cleanup pairing across IF/ELSE branches with optional early-return | ### `stream_into_boundary` matrix @@ -137,6 +140,24 @@ EXCLUSIVE alias" stops at annotation and never observes the codegen- level type mismatch. The matrix end-to-end run does. This is exactly the gap the harness was added for. +### Cleanup-correctness matrices (B1: three focused templates) + +`loop_cleanup`, `error_cleanup`, and `branch_cleanup` share a common +dimensions module (`templates/_cleanup_dimensions.rb`) so adding a new +allocation kind propagates to all three. Each template owns its +control-flow dimension (loop disruptors, error patterns, branch shapes) +since those are template-specific. + +The shared module exposes `ALLOC_KINDS` (`:heap_list`, `:heap_string`, +`:frame_string_concat`, `:frame_list`), `VALUE_DESTS`, and helpers for +emitting allocation declarations and use statements. + +Together these stress INV-2 (every alloc has a cleanup on every path), +INV-6 (loop bodies that frame-allocate have per-iteration mark/rewind), +and INV-9 (error paths preserve allocator identity). The matrix surfaces +many findings — see `docs/agents/formal-verification-bugs.md` for the +catalogue. + ### `execution_boundary` matrix Verifies the modifier × ownership rules from `src/ast/diagnostic_registry.rb`. diff --git a/tools/fuzz/templates/_cleanup_dimensions.rb b/tools/fuzz/templates/_cleanup_dimensions.rb new file mode 100644 index 000000000..3c56ea9e6 --- /dev/null +++ b/tools/fuzz/templates/_cleanup_dimensions.rb @@ -0,0 +1,66 @@ +# Shared dimensions for the cleanup-correctness matrix templates +# (loop_cleanup, branch_cleanup, error_cleanup). Loaded by the auto-loader +# but doesn't call FuzzGenerator.register, so it's a no-op as a template +# and just exposes constants/helpers via require_relative. +# +# Adding a new alloc kind here propagates to all three templates. Adding +# a new control-flow shape goes in the relevant template (its dimension +# is template-specific by design). + +module CleanupDims + # What gets allocated. Each kind has different cleanup requirements: + # :heap_list — `MUTABLE v: Int64[]@list = []` ; ArrayList deinit + # :heap_string — `MUTABLE s: String = ""` ; alloc.free + # :frame_string_concat — `s: String = "a" + i.toString()` ; frame mark/rewind + # :frame_list — `xs: Int64[] = [i, i+1]` ; frame mark/rewind + ALLOC_KINDS = [:heap_list, :heap_string, :frame_string_concat, :frame_list] + + # What ultimately happens to the allocated value. Affects whether the + # cleanup happens locally (via mark/rewind / scope-end deinit) or whether + # the value escapes (forcing heap promotion). + VALUE_DESTS = [:locally_cleaned, :appended_to_outer] + + # Emit the allocation declaration line. `idx_expr` is the Int64 + # expression substituted into the constructor (loops pass "i"; branch / + # error contexts pass a literal like "1_i64"). + def self.alloc_decl(kind, varname: "v", idx_expr: "1_i64") + case kind + when :heap_list + "MUTABLE #{varname}: Int64[]@list = [];" + when :heap_string + "MUTABLE #{varname}: String = \"\";" + when :frame_string_concat + "#{varname}: String = \"a\" + #{idx_expr}.toString();" + when :frame_list + "#{varname}: Int64[] = [#{idx_expr}, #{idx_expr} + 1_i64];" + end + end + + # Emit a "use" statement that exercises the allocated value. + def self.use_stmt(kind, varname: "v", idx_expr: "1_i64") + case kind + when :heap_list then "#{varname}.append(#{idx_expr});" + when :heap_string then "#{varname} = #{varname} + #{idx_expr}.toString();" + when :frame_string_concat then "_ = #{varname}.length();" + when :frame_list then "_ = #{varname}[0_i64];" + end + end + + # Type of the value (for use in struct/list element types). + def self.value_type(kind) + case kind + when :heap_list, :frame_list then "Int64[]@list" + when :heap_string, :frame_string_concat then "String" + end + end + + # The outer collection's element type when value_dest is :appended_to_outer. + # Outer is always heap; element matches the inner alloc's type. + def self.outer_decl_for(kind, varname = "outer") + "MUTABLE #{varname}: #{value_type(kind)}[]@list = [];" + end + + def self.outer_append(kind, outer_var, value_var) + "#{outer_var}.append(#{value_var});" + end +end diff --git a/tools/fuzz/templates/branch_cleanup.rb b/tools/fuzz/templates/branch_cleanup.rb new file mode 100644 index 000000000..740b02948 --- /dev/null +++ b/tools/fuzz/templates/branch_cleanup.rb @@ -0,0 +1,99 @@ +# Template: cleanup correctness across IF/ELSE branches. +# Stresses INV-2 (every alloc has a cleanup on every path) when allocations +# happen asymmetrically across branches, with optional early-return +# disruptors that may bypass cleanup for one path. +# +# Cell schema: +# { alloc:, shape:, disruptor:, expected: } +# +# alloc ∈ ALLOC_KINDS (from _cleanup_dimensions.rb) +# shape ∈ { +# :then_only, # alloc only in THEN branch +# :else_only, # alloc only in ELSE branch +# :both_same, # alloc in both branches, same kind +# :both_diff, # alloc in both branches, but ELSE uses a different kind +# } +# disruptor ∈ {:none, :return_from_then, :return_from_else} + +require_relative '_cleanup_dimensions' + +BRANCH_CLEANUP_CELLS = [] + +BRANCH_SHAPES = [:then_only, :else_only, :both_same, :both_diff] +BRANCH_DISRUPTORS = [:none, :return_from_then, :return_from_else] + +CleanupDims::ALLOC_KINDS.each do |a| + BRANCH_SHAPES.each do |s| + BRANCH_DISRUPTORS.each do |d| + BRANCH_CLEANUP_CELLS << { alloc: a, shape: s, disruptor: d } + end + end +end + +# Pick an "alternate" alloc kind for the :both_diff shape — distinct from +# the primary so the two branches have genuinely different cleanup paths. +def branch_alt_alloc(primary) + case primary + when :heap_list then :heap_string + when :heap_string then :heap_list + when :frame_string_concat then :frame_list + when :frame_list then :frame_string_concat + end +end + +def branch_then_body(p) + return nil if p[:shape] == :else_only + decl = CleanupDims.alloc_decl(p[:alloc], varname: "v", idx_expr: "1_i64") + use = CleanupDims.use_stmt(p[:alloc], varname: "v", idx_expr: "1_i64") + ret = (p[:disruptor] == :return_from_then) ? "RETURN;" : nil + [decl, use, ret].compact.join("\n ") +end + +def branch_else_body(p) + return nil if p[:shape] == :then_only + alloc = (p[:shape] == :both_diff) ? branch_alt_alloc(p[:alloc]) : p[:alloc] + decl = CleanupDims.alloc_decl(alloc, varname: "w", idx_expr: "2_i64") + use = CleanupDims.use_stmt(alloc, varname: "w", idx_expr: "2_i64") + ret = (p[:disruptor] == :return_from_else) ? "RETURN;" : nil + [decl, use, ret].compact.join("\n ") +end + +FuzzGenerator.register(:branch_cleanup, cells: BRANCH_CLEANUP_CELLS) do |p| + then_body = branch_then_body(p) + else_body = branch_else_body(p) + + if_block = if then_body && else_body + <<~CHT.chomp + IF cond THEN + #{then_body} + ELSE + #{else_body} + END + CHT + elsif then_body + <<~CHT.chomp + IF cond THEN + #{then_body} + END + CHT + else + <<~CHT.chomp + IF NOT cond THEN + #{else_body} + END + CHT + end + + <<~CHT + FN run(cond: Bool) RETURNS !Void -> + #{if_block} + RETURN; + END + + FN main() RETURNS Void -> + run(TRUE) OR PASS; + run(FALSE) OR PASS; + RETURN; + END + CHT +end diff --git a/tools/fuzz/templates/error_cleanup.rb b/tools/fuzz/templates/error_cleanup.rb new file mode 100644 index 000000000..87747419c --- /dev/null +++ b/tools/fuzz/templates/error_cleanup.rb @@ -0,0 +1,99 @@ +# Template: cleanup correctness on error paths. +# Stresses INV-9 (error paths preserve allocator identity) plus the +# general invariant that every alloc has a cleanup on every error +# control-flow shape. +# +# Cross-references: +# - CLAUDE.md INV-9: "If an operation can fail, the error path must not +# change the allocator identity of any live value." +# - docs/agents/mir-bugs.md #7 (OrRescue Fallback Dupe Fragility) +# - docs/agents/formal-verification-bugs.md #10 (`x = expr OR PASS` leaks +# for heap-returning fn) +# +# Cell schema: +# { alloc:, pattern:, expected: } +# +# alloc ∈ ALLOC_KINDS (from _cleanup_dimensions.rb) +# pattern ∈ { +# :success_or_pass, # inner returns value, OR PASS no-op +# :raise_or_pass, # inner raises, OR PASS swallows +# :success_or_raise, # inner returns value, OR RAISE no-op +# :raise_or_raise, # inner raises, OR RAISE re-propagates +# :success_or_default, # inner returns value, OR no-op +# :raise_or_default, # inner raises, fallback value used +# } + +require_relative '_cleanup_dimensions' + +ERROR_CLEANUP_CELLS = [] + +ERROR_PATTERNS = [ + :success_or_pass, :raise_or_pass, + :success_or_raise, :raise_or_raise, + :success_or_default, :raise_or_default, +] + +CleanupDims::ALLOC_KINDS.each do |a| + ERROR_PATTERNS.each do |pat| + ERROR_CLEANUP_CELLS << { alloc: a, pattern: pat } + end +end + +# ── helpers ─────────────────────────────────────────────────────────── + +def err_inner_fn(alloc, raises:) + ret_t = "!#{CleanupDims.value_type(alloc)}" + decl = CleanupDims.alloc_decl(alloc, varname: "v", idx_expr: "1_i64") + use = CleanupDims.use_stmt(alloc, varname: "v", idx_expr: "1_i64") + raise_line = raises ? "RAISE;" : nil + + body_lines = [decl, use, raise_line, "RETURN v;"].compact.join("\n ") + <<~CHT.chomp + FN inner() RETURNS #{ret_t} -> + #{body_lines} + END + CHT +end + +def err_default_value(alloc) + case alloc + when :heap_list, :frame_list then "[]" + when :heap_string, :frame_string_concat then "\"\"" + end +end + +def err_caller(alloc, pattern) + vt = CleanupDims.value_type(alloc) + call = case pattern + when :success_or_pass, :raise_or_pass + "result: #{vt} = inner() OR PASS;" + when :success_or_raise, :raise_or_raise + "result: #{vt} = inner() OR RAISE;" + when :success_or_default, :raise_or_default + "result: #{vt} = inner() OR #{err_default_value(alloc)};" + end + call +end + +FuzzGenerator.register(:error_cleanup, cells: ERROR_CLEANUP_CELLS) do |p| + raises = p[:pattern].to_s.start_with?('raise_') + inner_fn = err_inner_fn(p[:alloc], raises: raises) + caller_line = err_caller(p[:alloc], p[:pattern]) + + # main absorbs any propagated raise via OR PASS so the test exits + # cleanly. The matrix oracle is leak / Invalid-free; not whether the + # raise propagated. + <<~CHT + #{inner_fn} + + FN main() RETURNS Void -> + run() OR PASS; + RETURN; + END + + FN run() RETURNS !Void -> + #{caller_line} + RETURN; + END + CHT +end diff --git a/tools/fuzz/templates/loop_cleanup.rb b/tools/fuzz/templates/loop_cleanup.rb new file mode 100644 index 000000000..b4a146540 --- /dev/null +++ b/tools/fuzz/templates/loop_cleanup.rb @@ -0,0 +1,118 @@ +# Template: cleanup correctness inside loop bodies. +# Stresses INV-2 (every alloc has a cleanup on every path) + INV-6 (loop +# bodies that frame-allocate have per-iteration mark/rewind) under +# control-flow disruptors that may skip the cleanup point. +# +# Recent bugs in this category that motivated the template: +# - 9fa21926 fix(mir): cover escaping frame collections in loops +# - d80e6539 fix(mir): narrow loop escape promotion to strings +# - 1599bfb1 (loop-local list double-free) +# These bugs all involved a frame-arena alloc inside a loop body and a +# control-flow shape that didn't quite line up with the mark/rewind logic. +# +# Cell schema: +# { alloc:, disruptor:, dest:, expected: } +# +# alloc ∈ ALLOC_KINDS (from _cleanup_dimensions.rb) +# disruptor ∈ {:none, :break, :continue, :early_return, :raise} +# dest ∈ VALUE_DESTS — :locally_cleaned (per-iteration scope-end +# cleanup) or :appended_to_outer (forces heap promotion) +# +# Expected: :pass for every cell. A leak / Invalid free / UAF / MIR-FAIL +# is the matrix's signal that the alloc/cleanup pairing is broken for +# that combination. + +require_relative '_cleanup_dimensions' + +LOOP_CLEANUP_CELLS = [] + +LOOP_DISRUPTORS = [:none, :break, :continue, :early_return, :raise] + +CleanupDims::ALLOC_KINDS.each do |a| + LOOP_DISRUPTORS.each do |d| + CleanupDims::VALUE_DESTS.each do |dest| + LOOP_CLEANUP_CELLS << { alloc: a, disruptor: d, dest: dest } + end + end +end + +# ── helpers ─────────────────────────────────────────────────────────── + +# Build the loop-body fragment for a given (alloc, disruptor, dest). +# Returns the body that goes between FOR i IN ... DO and END. +def loop_cleanup_body(p) + decl = CleanupDims.alloc_decl(p[:alloc], idx_expr: "i") + use = CleanupDims.use_stmt(p[:alloc], idx_expr: "i") + + disruptor = case p[:disruptor] + when :none then nil + when :break then "IF i == 3_i64 THEN BREAK; END" + when :continue then "IF i == 2_i64 THEN CONTINUE; END" + when :early_return then "IF i == 2_i64 THEN RETURN; END" + when :raise then "IF i == 2_i64 THEN RAISE; END" + end + + escape = (p[:dest] == :appended_to_outer) ? "outer.append(v);" : nil + + # Order matters for the disruptor: continue jumps over the use, so put + # use BEFORE continue. Other disruptors AFTER decl + use so the alloc + # is live at the disruptor point. + case p[:disruptor] + when :continue + [decl, disruptor, use, escape].compact + else + [decl, use, disruptor, escape].compact + end.join("\n ") +end + +# What return type and main wrapper the function needs. +def loop_cleanup_fn_wrap(p, body) + needs_outer = (p[:dest] == :appended_to_outer) + outer_decl = needs_outer ? " #{CleanupDims.outer_decl_for(p[:alloc])}" : nil + + # Always !T — any of our alloc kinds use fallible operations (List.append, + # String concat with OOM). RETURNS !T propagates the error union; caller + # handles via `OR RAISE`. Avoids per-cell guessing about which bodies are + # fallible. + ret_t = if needs_outer + "!#{CleanupDims.value_type(p[:alloc])}[]@list" + else + "!Void" + end + + ret_stmt = needs_outer ? " RETURN outer;" : " RETURN;" + + # main absorbs raises so the test exits clean — the matrix oracle is + # cleanup correctness (no leak), not whether RAISE happens. Without this, + # raise-disruptor cells exit non-zero just because they raised, which + # the runner can't distinguish from a real bug. + inner_call = if needs_outer + "_ = run() OR PASS;" + else + "run() OR PASS;" + end + + fn_def = <<~CHT.chomp + FN run() RETURNS #{ret_t} -> + #{outer_decl ? outer_decl + "\n" : ''} FOR i IN (1_i64 ..= 5_i64) DO + #{body} + END + #{ret_stmt} + END + CHT + + main = <<~CHT.chomp + FN main() RETURNS Void -> + #{inner_call} + RETURN; + END + CHT + + [fn_def, main] +end + +FuzzGenerator.register(:loop_cleanup, cells: LOOP_CLEANUP_CELLS) do |p| + body = loop_cleanup_body(p) + fn_def, main = loop_cleanup_fn_wrap(p, body) + "#{fn_def}\n\n#{main}\n" +end From 5545b160cb4273e38796e0e7d2062df4b1ef00c0 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sun, 10 May 2026 14:50:38 +0000 Subject: [PATCH 19/21] =?UTF-8?q?feat(fuzz):=20or=5Fpositional=20template?= =?UTF-8?q?=20=E2=80=94=20`expr=20OR=20`=20in=20every=20syntactic?= =?UTF-8?q?=20position?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug #10 in formal-verification-bugs.md (`result = expr OR PASS` leaks) was only the tip. This matrix exercises every syntactic position OR can appear in × action × inner outcome. Cell schema: position × action × outcome × inner_t. Positions tested: - :assign_rhs — `result = inner() OR …` - :fn_arg — `consume(inner() OR …)` - :method_arg — `outer.append(inner() OR …)` - :return_expr — `RETURN inner() OR …` - :with_source — `WITH EXCLUSIVE (inner() OR …) AS x { … }` - :collection_lit — `[inner() OR …]` Actions: PASS, RAISE, . Outcomes: inner succeeds vs raises. Inner types: heap_list, heap_string. = 60 cells total. Findings: 15 ok / 45 fail. The bug surface is much wider than originally suspected — `OR ` cleanup pairing is broken across most positions: - All assign_rhs cells with inner=success leak (5 cells) - All method_arg + heap_string + success leak (3 cells) - All return_expr cells fail (LEAK or MIR-FAIL, 7 cells) - All with_source cells MIR-FAIL universally (10 cells) - Various others depending on action / outcome / type Updated bug #10 in formal-verification-bugs.md with the broader surface. Combined matrix now: 13 templates, 375 active cells, 30 :in_dev. Per branch policy: matrix surfaces; fixes land elsewhere. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/agents/formal-verification-bugs.md | 49 +++++-- tools/fuzz/README.md | 1 + tools/fuzz/templates/or_positional.rb | 168 ++++++++++++++++++++++++ 3 files changed, 204 insertions(+), 14 deletions(-) create mode 100644 tools/fuzz/templates/or_positional.rb diff --git a/docs/agents/formal-verification-bugs.md b/docs/agents/formal-verification-bugs.md index 1bc2dd3bc..73d41331e 100644 --- a/docs/agents/formal-verification-bugs.md +++ b/docs/agents/formal-verification-bugs.md @@ -187,11 +187,11 @@ The BG STREAM parser has no equivalent of `parse_bg_prefix` — modifier sigils inside the stream body don't parse. **Inconsistent with BG**, which accepts the same syntax. -### 10. `result = heap_returning_fn() OR PASS` leaks +### 10. `expr OR ` cleanup pairing is broken across most positions -**Found by**: `loop_cleanup` template + manual probe +**Found by**: `loop_cleanup` (initially), `or_positional` (full surface) -**Repro**: +**Initial repro** (assignment RHS): ```clear FN run() RETURNS !Int64[]@list -> MUTABLE outer: Int64[]@list = []; @@ -204,17 +204,38 @@ FN main() RETURNS Void -> RETURN; END ``` - -**Symptom**: `1 errors were logged. 1 tests leaked memory.` - -When a fallible heap-returning function is called with `OR PASS`, the -returned heap collection isn't cleaned up at the binding's scope end. -The `OR PASS` path appears to leave the binding without its cleanup -defer. - -`x = expr OR RAISE` may have the same issue (haven't isolated yet — -the RAISE re-propagates and the test exits non-zero before the leak -detector can report). +Symptom: `1 tests leaked memory.` + +**Full surface** from `or_positional` matrix (60 cells, 45 fail): + +| Position × outcome=success | Result | +|---|---| +| `result = inner() OR PASS` | LEAK (heap_list, heap_string) | +| `result = inner() OR RAISE` | LEAK | +| `result = inner() OR ` | LEAK | +| `outer.append(inner() OR …)` | LEAK (heap_string), MIR-FAIL (heap_list) | +| `RETURN inner() OR …` | LEAK or MIR-FAIL depending on action/type | +| `WITH (inner() OR …) AS x { … }` | MIR-FAIL universally (10 cells) | +| `[inner() OR …]` (collection lit) | mixed, mostly OK on heap_list | +| `consume(inner() OR …)` (fn arg) | mostly OK; one runtime FAIL | + +The bug isn't just `OR PASS` — `OR RAISE`, `OR ` all exhibit +similar cleanup-pairing failures depending on syntactic position. The +inner value's allocation isn't paired with a cleanup defer at the OR +expression's binding site (or doesn't propagate through the chosen OR +lowering path). + +**Root cause hypothesis**: each `OR ` lowering path has its +own cleanup logic, and the pairing is position-specific — so the bug +surface is wider than the assignment-RHS case that originally triggered +it. + +**Cells that pass**: `:fn_arg + :pass/:default + :success` (heap_list +and heap_string both work). And cells where the inner raises and the OR +is a `pass`/`default` for some positions. + +The 15 passing cells of 60 don't form an obvious pattern — the failure +is systemic across positions, not localized to one path. ## Workarounds the fuzz templates have taken diff --git a/tools/fuzz/README.md b/tools/fuzz/README.md index aea4b7bd4..db816a0b6 100644 --- a/tools/fuzz/README.md +++ b/tools/fuzz/README.md @@ -51,6 +51,7 @@ cell into a complete .cht source string with embedded `ASSERT` oracles. | `loop_cleanup` | 40 | INV-2 / INV-6: alloc-cleanup pairing under loop disruptors (break, continue, return, raise) | | `error_cleanup` | 24 | INV-9: alloc-cleanup pairing on error paths (OR PASS / RAISE / DEFAULT) | | `branch_cleanup` | 48 | INV-2: alloc-cleanup pairing across IF/ELSE branches with optional early-return | +| `or_positional` | 60 | `expr OR ` in every syntactic position × action × inner outcome | ### `stream_into_boundary` matrix diff --git a/tools/fuzz/templates/or_positional.rb b/tools/fuzz/templates/or_positional.rb new file mode 100644 index 000000000..2be9a381f --- /dev/null +++ b/tools/fuzz/templates/or_positional.rb @@ -0,0 +1,168 @@ +# Template: `expr OR ` in every syntactic position. +# +# Bug #10 in formal-verification-bugs.md: `result = run() OR PASS;` +# leaks when run() returns a heap-allocated value. The cleanup defer +# for `result` doesn't get attached. The fuzz hypothesis is that +# `OR ` lowering paths each have their own cleanup logic, and +# the pairing is position-specific — so the bug surface is wider than +# just assignment-RHS. +# +# This matrix exercises every position OR can appear in × every action × +# both success and failure of the inner expression. +# +# Cell schema: +# { position:, action:, outcome:, inner_t: } +# +# position ∈ { +# :assign_rhs, # x = expr OR action +# :fn_arg, # consume(expr OR action) +# :method_arg, # outer.append(expr OR action) +# :return_expr, # RETURN expr OR action +# :with_source, # WITH EXCLUSIVE (expr OR action) AS x { ... } +# :collection_lit,# [expr OR action] +# } +# +# action ∈ {:pass, :raise, :default} +# outcome ∈ {:success, :raise} # whether inner raises +# inner_t ∈ {:heap_list, :heap_string} # both heap to maximize leak signal + +OR_POSITIONS = [:assign_rhs, :fn_arg, :method_arg, :return_expr, + :with_source, :collection_lit] +OR_ACTIONS = [:pass, :raise, :default] +OR_OUTCOMES = [:success, :raise] +OR_INNER_TS = [:heap_list, :heap_string] + +OR_POSITIONAL_CELLS = [] +OR_POSITIONS.each do |pos| + OR_ACTIONS.each do |act| + OR_OUTCOMES.each do |out| + OR_INNER_TS.each do |t| + # OR RAISE in :raise outcome propagates and exits non-zero — that + # masks leak detection. Skip; the other actions cover the path. + next if act == :raise && out == :raise + OR_POSITIONAL_CELLS << { position: pos, action: act, outcome: out, inner_t: t } + end + end + end +end + +# ── helpers ─────────────────────────────────────────────────────────── + +def or_inner_value_type(t) + case t + when :heap_list then "Int64[]@list" + when :heap_string then "String" + end +end + +def or_inner_default(t) + case t + when :heap_list then "[]" + when :heap_string then "\"\"" + end +end + +def or_inner_construct(t) + case t + when :heap_list then "MUTABLE v: Int64[]@list = []; v.append(1_i64);" + when :heap_string then "MUTABLE v: String = \"\"; v = v + \"x\";" + end +end + +def or_inner_fn(t, raises:) + ret_t = or_inner_value_type(t) + body = or_inner_construct(t) + raise_line = raises ? "RAISE;" : "RETURN v;" + <<~CHT.chomp + FN inner() RETURNS !#{ret_t} -> + #{body} + #{raise_line} + END + CHT +end + +def or_action_text(action, t) + case action + when :pass then "PASS" + when :raise then "RAISE" + when :default then or_inner_default(t) + end +end + +# Build the body of the run() function for the cell. +def or_body(p) + vt = or_inner_value_type(p[:inner_t]) + act = or_action_text(p[:action], p[:inner_t]) + expr = "inner() OR #{act}" + + case p[:position] + when :assign_rhs + "result: #{vt} = #{expr};" + when :fn_arg + # consume(...) accepts a value of type vt and returns Int64 length. + "len: Int64 = consume(#{expr});" + when :method_arg + # method-arg position: append a heap-allocated value into outer list. + "MUTABLE outer: #{vt}[]@list = []; outer.append(#{expr});" + when :return_expr + # The OR expression IS the function's return; signature differs from + # other positions. Caller binds the result. + "RETURN #{expr};" + when :with_source + # WITH source — wrap expr in @locked so WITH EXCLUSIVE has a target. + "container = (#{expr}) @locked; WITH EXCLUSIVE container AS x { _ = #{x_use(p[:inner_t])}; }" + when :collection_lit + "list: #{vt}[]@list = [#{expr}];" + end +end + +def x_use(t) + case t + when :heap_list then "x[0_i64]" + when :heap_string then "x.length()" + end +end + +# What return type run() needs depends on the position. +def or_run_signature(p) + vt = or_inner_value_type(p[:inner_t]) + rt = case p[:position] + when :return_expr then vt + else "Void" + end + "FN run() RETURNS !#{rt} ->" +end + +# consume helper for fn_arg cells. Takes the value, returns its length so +# the caller can bind to Int64. +def or_consume_helper(t) + case t + when :heap_list + "FN consume(TAKES xs: Int64[]@list) RETURNS Int64 -> RETURN length(xs); END" + when :heap_string + "FN consume(TAKES s: String) RETURNS Int64 -> RETURN s.length(); END" + end +end + +FuzzGenerator.register(:or_positional, cells: OR_POSITIONAL_CELLS) do |p| + raises = (p[:outcome] == :raise) + inner_fn = or_inner_fn(p[:inner_t], raises: raises) + body = or_body(p) + signature = or_run_signature(p) + + helpers = (p[:position] == :fn_arg) ? or_consume_helper(p[:inner_t]) + "\n\n" : "" + + <<~CHT + #{helpers}#{inner_fn} + + #{signature} + #{body} + #{p[:position] == :return_expr ? '' : 'RETURN;'} + END + + FN main() RETURNS Void -> + run() OR PASS; + RETURN; + END + CHT +end From af0ba40e0b70209697cd9f9fc196a5961aeaa30a Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sun, 10 May 2026 15:10:18 +0000 Subject: [PATCH 20/21] fix(annotator): self-contained predicate for shallow-copy borrow check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The lifetime check at function_analysis.rb#verify_return already rejects borrow returns without COPY or RETURNS x:T — but its gate used Type#copyable? / implicitly_copyable?, which treat strings, slices, and any struct/union recursively containing them as Copy. That answers "can I memcpy this value?" — true for slice headers (ptr+len) — but conflates value-Copy with self-containment. The bytes the slice points to live in someone else's heap; returning the slice without a lifetime contract is a shallow-copy escape that surfaces as UAF the moment the caller drops the source. Repro: RETURN e.errMsg where e is a parameter compiled cleanly, yielded a slice into caller-owned memory, and was relied on by existing transpile-tests (77, 78) and a spec (caller_cleanup_spec). Same hole exists for plain T[] slices and for structs/unions with String fields. Fix: new Type#self_contained?(resolver) in src/ast/type.rb. Stricter than copyable?: returns false for string?, array?, heap?, sync/wrapper types, containers, @indirect; recurses into structs and union variants requiring all to be self-contained. function_analysis.rb#verify_return gates on this instead of is_copyable. The existing RETURNS x:T lifetime annotation IS the mechanism for shallow-copy returns. No new MIR infrastructure needed (user asked whether MIR-level dangling-pointer tracking was warranted — it is not, the existing lifetime check handles it once the gate is correct). Blast radius: 2 transpile-tests + 1 spec that did `RETURN x.name` where x was a local. Real shallow-copy escapes; fixed by binding `name = COPY x.name; RETURN name;`. The naive `RETURN COPY x.name` form hits a separate lowering bug (UNHOISTED_ALLOC for DeepCopy in non-Let-init position) — filed as task #27. Matrix coverage: 4 new cells in spec/borrow_escape_matrix_spec.rb under "function parameter (shallow-copy escape via param.field)". Verified: 4738 examples / 0 failures / 3 pending. Sorbet clean. All transpile-tests/*.cht produce valid Zig. MAL still passes 75/75 (already used COPY at every borrow-return site). Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/agents/mal-bugs.md | 49 +++++++++++++++ spec/borrow_escape_matrix_spec.rb | 47 +++++++++++++++ spec/caller_cleanup_spec.rb | 3 +- src/annotator-helpers/function_analysis.rb | 12 +++- src/ast/type.rb | 70 ++++++++++++++++++++++ transpile-tests/77_error_snapshot.cht | 6 +- transpile-tests/78_snapshot_ambiguous.cht | 3 +- 7 files changed, 184 insertions(+), 6 deletions(-) diff --git a/docs/agents/mal-bugs.md b/docs/agents/mal-bugs.md index b1a697abf..e9a9f0126 100644 --- a/docs/agents/mal-bugs.md +++ b/docs/agents/mal-bugs.md @@ -168,6 +168,54 @@ Repro: `transpile-tests/_xfail/borrow_escape_via_plain_assign.cht` **MAL change:** the existing `ast = COPY tco.tcoAst;` is now the documented correct form (the compiler would reject `ast = tco.tcoAst;`). +## 8. RETURN param.field of non-Copy type silently accepted — FIXED + +`function_analysis.rb:1022` already has the right *check*: returns +of non-Copy borrowed values without `COPY` or a `RETURNS x:T` lifetime +annotation are rejected. The diagnostic is `RETURN_BORROWED_NO_COPY_OR_LIFETIME`. + +But the gate used `Type#copyable?` / `Type#implicitly_copyable?` to +decide what counts as Copy. Both treat strings, plain slices, and +structs/unions whose recursion bottoms out at strings/slices as +"Copy" — because the **header** copies (slice = ptr+len). This +conflates two different questions: + +- Can I memcpy this value? (header copies fine — `copyable?`) +- Does this value own all the data it references? (NEW: `self_contained?`) + +Strings, slices, and any struct/union transitively containing them +fail the second test: copying the slice doesn't copy the bytes. +Returning them without a lifetime annotation is exactly the +shallow-copy escape the user warned about. + +**Fix (compiler):** new `Type#self_contained?(resolver)` predicate in +`src/ast/type.rb` (right next to `implicitly_copyable?`). Stricter +than `copyable?`: returns false for `string?`, `array?`, `heap?`, +sync/wrapper types, containers, `@indirect`. Recurses into struct +fields and union variants requiring all to be self-contained. Replaces +`is_copyable` in `function_analysis.rb#verify_return`. The existing +lifetime-path validation (lines 1031-1056) and +`emit_return_borrowed_no_copy_error!` are unchanged — we just gated +them correctly. + +**On MIR-level dangling-pointer tracking:** the user asked whether +the MIR system should track shallow copies and detect dangling +pointers. The existing `RETURNS x:T` lifetime annotation IS that +mechanism. Once the verify_return gate is correctly tied to +`self_contained?`, every shallow-copy return must be either `COPY`'d +(self-contained at runtime) or annotated with a source binding (the +caller's lifetime promise). No new MIR infrastructure needed. + +**Blast radius:** caught 2 transpile-tests (77, 78) and 1 spec +(caller_cleanup_spec.rb:126) that returned `validUser.name` (local +struct's String field) without `COPY`. Real shallow-copy escapes; +fixed by binding `name = COPY validUser.name` then returning `name`. +The naive `RETURN COPY validUser.name` form hits a separate +lowering bug (UNHOISTED_ALLOC) — filed as task #27. + +**Matrix coverage:** added "function parameter" producer category +(4 cells) to `spec/borrow_escape_matrix_spec.rb`. + ## 7. 72 leaks under DebugAllocator on MAL test run — FILED After all 75 MAL interpreter tests pass, the debug allocator reports @@ -207,4 +255,5 @@ case future MAL changes push past LARGE. | 4 | Loop-carry frame->heap binding | Fixed (compiler Cond 10) | No | | 5 | Borrow-escape on plain `=` | Fixed (compiler universal rule) | No | | 7 | 72 leaks under DebugAllocator | **Filed, not fixed** | No | +| 8 | RETURN param.field shallow-copy escape | Fixed (Type#self_contained?) | No | | 6 | Stack-size note | n/a | n/a | diff --git a/spec/borrow_escape_matrix_spec.rb b/spec/borrow_escape_matrix_spec.rb index 0faf38a10..0f9ba2e43 100644 --- a/spec/borrow_escape_matrix_spec.rb +++ b/spec/borrow_escape_matrix_spec.rb @@ -164,6 +164,53 @@ def expect_accepts(src) end end + # ============================================================ Function parameters + # Parameters are owned by the caller; returning a chain rooted at a + # parameter produces a "shallow copy" — the slice header is fine, but + # the bytes/payload are owned by the caller's source. Without `COPY` or + # a `RETURNS x:T` lifetime annotation the compiler can't enforce that + # the caller keeps the parameter alive past the read of the return. + describe "function parameter (shallow-copy escape via param.field)" do + it "rejects RETURN of a non-Copy field of a parameter" do + expect_borrow_rejection(<<~CLEAR) + STRUCT ErrorStruct { errMsg: String, errKind: String } + FN getMsg(e: ErrorStruct) RETURNS String -> + RETURN e.errMsg; + END + CLEAR + end + + it "rejects RETURN of a slice field of a parameter" do + expect_borrow_rejection(<<~CLEAR) + STRUCT Bag { items: Int64[] } + FN getItems(b: Bag) RETURNS Int64[] -> + RETURN b.items; + END + CLEAR + end + + it "accepts RETURN COPY-at-binding" do + # Binding the COPY to a temp first sidesteps the unrelated + # UNHOISTED_ALLOC bug for `RETURN COPY x.field`. + expect_accepts(<<~CLEAR) + STRUCT ErrorStruct { errMsg: String, errKind: String } + FN getMsg(e: ErrorStruct) RETURNS !String -> + msg = COPY e.errMsg; + RETURN msg; + END + CLEAR + end + + it "accepts RETURNS p:T lifetime annotation" do + expect_accepts(<<~CLEAR) + STRUCT ErrorStruct { errMsg: String, errKind: String } + FN getMsg(e: ErrorStruct) RETURNS e:String -> + RETURN e.errMsg; + END + CLEAR + end + end + # ============================================================ WITH-AS (gated) describe "WITH ... AS alias (lock-bounded)" do it "rejects RETURN of the alias" do diff --git a/spec/caller_cleanup_spec.rb b/spec/caller_cleanup_spec.rb index 5afa65edc..951b83451 100644 --- a/spec/caller_cleanup_spec.rb +++ b/spec/caller_cleanup_spec.rb @@ -111,7 +111,8 @@ def fn_body(zig, name) END FN process(u: User) RETURNS !String -> validUser = u |> checkUser; - RETURN validUser.name; + name = COPY validUser.name; + RETURN name; CATCH Input RETURN "error"; END diff --git a/src/annotator-helpers/function_analysis.rb b/src/annotator-helpers/function_analysis.rb index e5e96aadc..58f4a1845 100644 --- a/src/annotator-helpers/function_analysis.rb +++ b/src/annotator-helpers/function_analysis.rb @@ -1015,11 +1015,19 @@ def verify_return(node) has_lifetime = !lifetime_paths.empty? is_wildcard = lifetime_paths == [:wildcard] schema_resolver = ->(t) { lookup_type_schema(t) rescue nil } - is_copyable = (type_info&.copyable?(schema_resolver) || type_info&.implicitly_copyable?(schema_resolver)) + # `copyable?` and `implicitly_copyable?` answer "can I memcpy the value?" + # — true for slice headers (ptr+len) even when the bytes live in + # someone else's heap. For the borrow-escape gate we need the STRICTER + # question: "does this value own all the data it references?". Only + # then is it safe to return without a `COPY` or `RETURNS x:T` lifetime. + # Strings, slices, @indirect pointers, and any struct/union transitively + # containing one of those are NOT self-contained — returning them is a + # shallow-copy escape that needs an explicit lifetime contract. + is_self_contained = (type_info&.self_contained?(schema_resolver) rescue false) fn_type_params = current_fn_ctx&.type_params || [] is_type_param = fn_type_params.include?(type_info&.resolved) - unless has_lifetime || is_copyable || is_type_param + unless has_lifetime || is_self_contained || is_type_param emit_return_borrowed_no_copy_error!(node) end diff --git a/src/ast/type.rb b/src/ast/type.rb index e3a96dc2d..f12bb5ecd 100644 --- a/src/ast/type.rb +++ b/src/ast/type.rb @@ -1357,6 +1357,76 @@ def implicitly_copyable?(lookup_arg = nil, &lookup_block) false end + # Self-contained: the value owns all the data it references. + # + # This is a STRICTER notion than `copyable?` / `implicitly_copyable?`. + # Those answer "can I memcpy this value?" — true for slice headers (ptr+len) + # even though the bytes live in someone else's heap. `self_contained?` + # answers "does this value need its source kept alive?" — false whenever + # the value transitively contains a String, slice, @indirect pointer, + # container, or other reference to externally-owned memory. + # + # Use this (NOT copyable?) at borrow-position checks, e.g. `verify_return` + # in function_analysis.rb. Returning a non-self-contained value without + # either `COPY` or a `RETURNS x:T` lifetime annotation is a UAF the moment + # the caller drops the source. + sig { params(lookup_arg: T.nilable(Proc), lookup_block: T.untyped).returns(T::Boolean) } + def self_contained?(lookup_arg = nil, &lookup_block) + return true if primitive? + # Pool Id handles are u64 indices — self-contained. + return true if generic_instance? && generic_base == :Id + + # Strings and slices: the header copies but the data is owned elsewhere. + return false if string? + return false if array? + + # Heap/sync wrappers: not even value-Copy, certainly not self-contained. + return false if heap? + return false if multiowned? || shared? || any_sync? + return false if list_collection? || pool? || set_collection? || map? + return false if respond_to?(:indirect?) && indirect? + + if lookup_arg || lookup_block + resolver = lookup_arg || lookup_block + schema = resolver.is_a?(Proc) ? resolver.call(resolved) : (resolver[resolved] rescue nil) + if schema.nil? && generic_instance? + schema = resolver.is_a?(Proc) ? resolver.call(generic_base) : (resolver[generic_base] rescue nil) + end + + # Enums are tag-only — self-contained. + return true if schema.is_a?(Hash) && schema[:kind] == :enum + + # Unions: self-contained iff every variant payload is self-contained. + if schema.is_a?(Hash) && schema[:kind] == :union + variants = schema[:variants] || {} + return variants.all? do |_, vt| + next true if vt.nil? + next Type.new(vt).self_contained?(resolver) if vt.is_a?(Symbol) || vt.is_a?(Type) + if vt.is_a?(Hash) + fields = vt[:fields] || vt + next fields.all? { |k, ft| + next true if k.is_a?(Symbol) + t = ft.is_a?(Type) ? ft : (ft.is_a?(Hash) ? Type.new(ft[:type] || :Any) : Type.new(ft || :Any)) + t.self_contained?(resolver) + } + end + false + end + end + + # Structs: self-contained iff every field is self-contained. + if schema.is_a?(Hash) && !schema[:kind] + return schema.all? do |k, v| + next true if k.is_a?(Symbol) # metadata (:type_params etc.) + ft = v.is_a?(Type) ? v : (v.is_a?(Hash) ? Type.new(v[:type] || :Any) : Type.new(v || :Any)) + ft.self_contained?(resolver) + end + end + end + + false + end + # ── Recursive type analysis (mirrors Zig comptime functions) ────── # Mirror of Zig's needsPromotion. Returns true if this type contains diff --git a/transpile-tests/77_error_snapshot.cht b/transpile-tests/77_error_snapshot.cht index dc06892cd..491f55662 100644 --- a/transpile-tests/77_error_snapshot.cht +++ b/transpile-tests/77_error_snapshot.cht @@ -11,11 +11,13 @@ END FN processUser(u: User) RETURNS !String -> valid = u |> validateUser; - RETURN valid.name; + name = COPY valid.name; + RETURN name; CATCH Input # snapshot is the User that failed validation - RETURN snapshot.name; + snapname = COPY snapshot.name; + RETURN snapname; END FN doublePositive(x: Int64) RETURNS !Int64 -> diff --git a/transpile-tests/78_snapshot_ambiguous.cht b/transpile-tests/78_snapshot_ambiguous.cht index 24b612802..52bc02360 100644 --- a/transpile-tests/78_snapshot_ambiguous.cht +++ b/transpile-tests/78_snapshot_ambiguous.cht @@ -19,7 +19,8 @@ END FN processBoth(u: User, o: Order) RETURNS !String -> validUser = u |> checkUser; validOrder = o |> checkOrder; - RETURN validUser.name; + name = COPY validUser.name; + RETURN name; CATCH Input # NOT accessing snapshot here (would be ambiguous: User or Order?) From 33fe5177959a8659d6dfccc26a3be28258b15932 Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Sun, 10 May 2026 15:49:14 +0000 Subject: [PATCH 21/21] fix(mir): skip DupeSlice wrap when value is already heap-allocating MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `lower_return` for catch_string_dupe_ret functions wraps every return value in `MIR::DupeSlice.new(value, :heap)` to ensure both success and error paths return heap-backed strings (consistent caller cleanup contract). That's correct for frame-allocated values (literals, slice-of-param, frame concats) — the wrap promotes them to heap. But when `value` is itself already heap-allocating (`MIR::DeepCopy` from `RETURN COPY x.field`, `MIR::ConcatStr` from a heap-promoted concat, etc.), the wrap NESTS the inner alloc inside DupeSlice. The MIR checker walks the Let.init with allow_top: true and recurses with allow_top: false; the nested DeepCopy lands in non-Let-init position and trips UNHOISTED_ALLOC. Surfaced by `RETURN COPY validUser.name` patterns in caller_cleanup spec, transpile-tests/77, and transpile-tests/78 — those were originally fixed with `name = COPY x.name; RETURN name;` workarounds to sidestep this lowering bug. With this fix the cleaner form works directly. Fix: gate the DupeSlice wrap on `mir_allocates?(value)`. If the value is already heap-allocating, hoist it as-is. Otherwise wrap in DupeSlice as before. Verified: 4742 examples / 0 failures / 3 pending. Sorbet clean. All transpile-tests/*.cht produce valid Zig. MAL still passes 75/75. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/mir/mir_lowering.rb | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/mir/mir_lowering.rb b/src/mir/mir_lowering.rb index 65f75f70c..d65e55bda 100644 --- a/src/mir/mir_lowering.rb +++ b/src/mir/mir_lowering.rb @@ -7193,9 +7193,17 @@ def lower_return(node) elsif needs_string_dupe && value ret_type = node.value.full_type ? Type.new(node.value.full_type) : nil if ret_type&.string? + # If `value` is already a heap-allocating expression (e.g. MIR::DeepCopy + # from `RETURN COPY x.field`, or MIR::ConcatStr from a heap-promoted + # concat), skip the DupeSlice wrap — it would nest the inner alloc + # under DupeSlice, putting it in non-Let-init position and tripping + # UNHOISTED_ALLOC. Just hoist the existing alloc directly. For non- + # allocating values (string literals, frame slices, slice-of-param), + # wrap in DupeSlice so the catch's heap contract still holds. + init = mir_allocates?(value) ? value : MIR::DupeSlice.new(value, :heap) MIR::ScopeBlock.new([ MIR::AllocMark.new("__ret_dupe", :heap, nil), - MIR::Let.new("__ret_dupe", MIR::DupeSlice.new(value, :heap), false, nil, nil), + MIR::Let.new("__ret_dupe", init, false, nil, nil), MIR::ErrCleanup.new("__ret_dupe", { kind: :heap_string, alloc: :heap, has_moved_guard: false }), MIR::ReturnStmt.new(MIR::Ident.new("__ret_dupe")) ])