diff --git a/src/mcp.zig b/src/mcp.zig index 94a4168..404c61b 100644 --- a/src/mcp.zig +++ b/src/mcp.zig @@ -494,7 +494,7 @@ pub const Tool = enum { codedb_ls, }; -const tools_list = +pub const tools_list = \\{"tools":[ \\{"name":"codedb_tree","description":"Whole-repo file tree with per-file language, line counts, and symbol counts. Use to orient in an unfamiliar project.","inputSchema":{"type":"object","properties":{"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}}, \\{"name":"codedb_outline","description":"Symbol outline of one file: functions, structs, enums, imports, consts with line numbers. 4-15x smaller than reading the raw file. Run before codedb_read to find the lines you actually need.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path relative to project root"},"compact":{"type":"boolean","description":"Condensed format without detail comments (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}}, @@ -509,7 +509,7 @@ const tools_list = \\{"name":"codedb_changes","description":"Files changed since a given sequence number. Pair with codedb_status to poll for updates.","inputSchema":{"type":"object","properties":{"since":{"type":"integer","description":"Sequence number to get changes since (default: 0)"}},"required":[]}}, \\{"name":"codedb_status","description":"Current indexed-file count, sequence number, and scan phase.","inputSchema":{"type":"object","properties":{"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}}, \\{"name":"codedb_snapshot","description":"Pre-rendered JSON snapshot of the entire index — tree, outlines, symbols, deps. For caching or shipping to edge workers.","inputSchema":{"type":"object","properties":{"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}}, - \\{"name":"codedb_bundle","description":"Run up to 20 codedb_* calls in one round-trip. Each op is either MCP-style {\"tool\":\"codedb_search\",\"arguments\":{\"query\":\"Agent\"}} or inline {\"tool\":\"codedb_search\",\"query\":\"Agent\"} — both are accepted. Example: {\"ops\":[{\"tool\":\"codedb_search\",\"arguments\":{\"query\":\"Agent\"}},{\"tool\":\"codedb_outline\",\"arguments\":{\"path\":\"src/main.zig\"}}]}. Best for parallel outline/symbol/search; avoid bundling large codedb_read calls — responses are not size-capped. If a sub-op reports `received keys: []`, the wrapper field is misnamed: use `arguments` (MCP spec), not `args`.","inputSchema":{"type":"object","properties":{"ops":{"type":"array","description":"Sub-tool calls to dispatch (max 20). Each item must have `tool`; pass per-op args either nested under `arguments` (MCP shape) or inline alongside `tool`.","items":{"type":"object","properties":{"tool":{"type":"string","description":"codedb_* tool name to invoke (e.g. codedb_outline, codedb_symbol, codedb_search, codedb_word, codedb_callers, codedb_read, codedb_deps, codedb_tree, codedb_hot, codedb_status, codedb_changes). Required."},"arguments":{"type":"object","description":"Per-call args matching that tool's inputSchema. The field MUST be named `arguments` (MCP `tools/call` convention) — `args` is silently ignored. May be omitted if you supply args inline at the op level instead."}},"required":["tool"]}},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["ops"]}}, + \\{"name":"codedb_bundle","description":"Run up to 20 codedb_* calls in one round-trip. Each op is either MCP-style {\"tool\":\"codedb_search\",\"arguments\":{\"query\":\"Agent\"}} or inline {\"tool\":\"codedb_search\",\"query\":\"Agent\"} — both are accepted. Example: {\"ops\":[{\"tool\":\"codedb_search\",\"arguments\":{\"query\":\"Agent\"}},{\"tool\":\"codedb_outline\",\"arguments\":{\"path\":\"src/main.zig\"}}]}. Best for parallel outline/symbol/search; avoid bundling large codedb_read calls — responses are not size-capped. If a sub-op reports `received keys: []`, the wrapper field is misnamed: use `arguments` (MCP spec), not `args`.","inputSchema":{"type":"object","properties":{"ops":{"type":"array","description":"Sub-tool calls to dispatch (max 20). Each item must have `tool` AND `arguments` (pass `{}` if the sub-tool takes none). Inline args alongside `tool` are still accepted as a fallback.","items":{"type":"object","properties":{"tool":{"type":"string","description":"codedb_* tool name to invoke (e.g. codedb_outline, codedb_symbol, codedb_search, codedb_word, codedb_callers, codedb_read, codedb_deps, codedb_tree, codedb_hot, codedb_status, codedb_changes). Required."},"arguments":{"type":"object","description":"Per-call args matching that tool's inputSchema. Field MUST be named `arguments` (MCP `tools/call` convention) — `args` is silently ignored. Pass `{}` only if the sub-tool takes no arguments. Required."}},"required":["tool","arguments"]}},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["ops"]}}, \\{"name":"codedb_remote","description":"Query indexed public repos via api.wiki.codes. Pass action= one of: tree, outline, search, read, symbol, deps, score, cves, commits, branches, dep-history, policy, actions. Use action=actions first if unsure what a repo supports.","inputSchema":{"type":"object","properties":{"repo":{"type":"string","description":"GitHub repo in owner/repo format (e.g. vercel/next.js) or a raw wiki slug such as chromium."},"action":{"type":"string","enum":["tree","outline","search","read","actions","symbol","policy","deps","score","cves","commits","branches","dep-history"],"description":"What to query from api.wiki.codes: actions, tree, search, outline, read, symbol, policy, deps, score, cves, commits, branches, dep-history."},"query":{"type":"string","description":"Action-specific argument. search: text query. symbol: identifier name. outline: file path."},"path":{"type":"string","description":"For action=read: the file path to fetch."},"lines":{"type":"string","description":"For action=read: line range like '10-60' (1-indexed, inclusive). Omit for full file."},"limit":{"type":"integer","description":"For search/tree/deps/commits/branches/dep-history: cap the number of items returned (server may enforce its own ceiling)."},"offset":{"type":"integer","description":"For tree/deps/commits/branches/dep-history: skip the first N items (pagination)."},"prefix":{"type":"string","description":"For tree: only return paths starting with this prefix (e.g. 'src/')."},"expand":{"type":"boolean","description":"For tree: when true, return the full file list. When false returns a compact directory summary when supported."},"since":{"type":"string","description":"For commits/dep-history: ISO timestamp or commit SHA to start from."},"scope":{"type":"string","enum":["runtime","all"],"description":"For score/cves only. Defaults to runtime; use all to include dev/tooling dependencies."},"backend":{"type":"string","enum":["wiki"],"description":"Deprecated compatibility field. Only 'wiki' is accepted; requests always use api.wiki.codes."}},"required":["repo","action"]}}, \\{"name":"codedb_projects","description":"List every locally indexed project on this machine: path, data-dir hash, snapshot presence.","inputSchema":{"type":"object","properties":{},"required":[]}}, \\{"name":"codedb_index","description":"Index a local FOLDER (not a file). Builds outlines, trigrams, word index, and writes codedb.snapshot. After indexing, query it via the project= param on any other tool.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"Absolute path to the FOLDER (not a file) to index, e.g. /Users/you/myproject"}},"required":["path"]}}, @@ -520,6 +520,82 @@ const tools_list = \\]} ; +/// Build the augmented `tools/list` payload with a discriminated `oneOf` on +/// the codedb_bundle ops items. Each branch pins `tool` to a const sub-tool +/// name and `arguments` to that sub-tool's actual `inputSchema`, so a model +/// emitting a bundle call is forced to populate `arguments` with the right +/// keys for whichever sub-tool it picked. (Stage 2 of issue #437; Stage 1 in +/// #434 added `arguments` to items.required.) +/// +/// codedb_bundle (recursive — rejected at handleBundle) and codedb_edit +/// (write op — rejected at handleBundle) are excluded from the oneOf. +/// +/// Caller owns returned slice. The intermediate parse and the slices it +/// references are freed before return. +pub fn buildAugmentedToolsList(alloc: std.mem.Allocator) ![]u8 { + var arena = std.heap.ArenaAllocator.init(alloc); + defer arena.deinit(); + const a = arena.allocator(); + + var parsed = try std.json.parseFromSlice(std.json.Value, a, tools_list, .{}); + + const root_obj = &parsed.value.object; + const tools_val = root_obj.getPtr("tools") orelse return error.MalformedToolsList; + if (tools_val.* != .array) return error.MalformedToolsList; + const tools_arr = &tools_val.array; + + // Locate codedb_bundle items, and collect (name, inputSchema) for every + // other tool to use as oneOf branches. + var bundle_items_ptr: ?*std.json.Value = null; + for (tools_arr.items) |*t| { + if (t.* != .object) continue; + const name_v = t.object.get("name") orelse continue; + if (name_v != .string) continue; + if (!std.mem.eql(u8, name_v.string, "codedb_bundle")) continue; + + const schema = t.object.getPtr("inputSchema") orelse continue; + if (schema.* != .object) continue; + const props = schema.object.getPtr("properties") orelse continue; + if (props.* != .object) continue; + const ops = props.object.getPtr("ops") orelse continue; + if (ops.* != .object) continue; + bundle_items_ptr = ops.object.getPtr("items") orelse continue; + break; + } + if (bundle_items_ptr == null) return error.BundleNotFound; + const bundle_items = bundle_items_ptr.?; + if (bundle_items.* != .object) return error.MalformedToolsList; + + var one_of: std.json.Array = .init(a); + + for (tools_arr.items) |t| { + if (t != .object) continue; + const sub_name_v = t.object.get("name") orelse continue; + if (sub_name_v != .string) continue; + const sub_name = sub_name_v.string; + if (std.mem.eql(u8, sub_name, "codedb_bundle")) continue; + if (std.mem.eql(u8, sub_name, "codedb_edit")) continue; + const sub_schema = t.object.get("inputSchema") orelse continue; + + var tool_const: std.json.ObjectMap = .{}; + try tool_const.put(a, "const", .{ .string = sub_name }); + + var branch_props: std.json.ObjectMap = .{}; + try branch_props.put(a, "tool", .{ .object = tool_const }); + try branch_props.put(a, "arguments", sub_schema); + + var branch: std.json.ObjectMap = .{}; + try branch.put(a, "properties", .{ .object = branch_props }); + + try one_of.append(.{ .object = branch }); + } + + try bundle_items.object.put(a, "oneOf", .{ .array = one_of }); + const augmented_in_arena = try std.json.Stringify.valueAlloc(a, parsed.value, .{}); + return try alloc.dupe(u8, augmented_in_arena); +} + + // ── MCP Server ────────────────────────────────────────────────────────────── /// Monotonic timestamp of last MCP request, used for activity accounting. @@ -616,6 +692,15 @@ pub fn run( var cache = ProjectCache.init(alloc, default_path); defer cache.deinit(); + // Build the augmented `tools/list` payload once at startup. Falls back + // to the raw `tools_list` const if augmentation fails for any reason + // (parse error, OOM) — clients still get a valid schema, just without + // the discriminated oneOf on the bundle ops. + const tools_list_response: []const u8 = blk: { + const augmented = buildAugmentedToolsList(alloc) catch break :blk tools_list; + break :blk augmented; + }; + defer if (tools_list_response.ptr != tools_list.ptr) alloc.free(tools_list_response); var session = Session{ .alloc = alloc, .stdout = stdout, @@ -677,7 +762,7 @@ pub fn run( requestRoots(&session); } } else if (mcpj.eql(method, "tools/list")) { - if (!is_notification) writeResult(alloc, stdout, id, tools_list); + if (!is_notification) writeResult(alloc, stdout, id, tools_list_response); } else if (mcpj.eql(method, "tools/call")) { handleCall(io, alloc, root, stdout, id, store, explorer, agents, &cache, telem, session.deferred_scan); } else if (mcpj.eql(method, "ping")) { diff --git a/src/tests.zig b/src/tests.zig index 8cf63d1..ae7eb9c 100644 --- a/src/tests.zig +++ b/src/tests.zig @@ -10452,6 +10452,116 @@ test "issue-424-A: bundle envelope errors carry the 'error:' prefix consistently try testing.expect(std.mem.indexOf(u8, out2.items, "error: missing 'tool'") != null); } +test "issue-434: codedb_bundle ops items schema requires arguments field" { + // The codedb_bundle inputSchema in tools_list advertises ops items as + // {required: ["tool"]} with arguments as a bare {type: "object"} that + // permits {}. Function-calling LLMs read the schema as authoritative and + // emit the minimum-valid payload — {tool: "...", arguments: {}} — which + // misroutes through the inline-args fallback and surfaces as + // "received keys: [tool, arguments]" from each sub-tool. Stage 1 fix: + // add "arguments" to the items.required array so models are forced to + // populate it. (Stage 2 — discriminated oneOf over tool — is a follow-up.) + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, mcp_mod.tools_list, .{}); + defer parsed.deinit(); + + const tools = parsed.value.object.get("tools").?.array; + var bundle_schema: ?std.json.Value = null; + for (tools.items) |t| { + const name = t.object.get("name").?.string; + if (std.mem.eql(u8, name, "codedb_bundle")) { + bundle_schema = t.object.get("inputSchema").?; + break; + } + } + try testing.expect(bundle_schema != null); + + const ops = bundle_schema.?.object.get("properties").?.object.get("ops").?; + const items = ops.object.get("items").?; + const required = items.object.get("required").?.array; + + var has_tool = false; + var has_arguments = false; + for (required.items) |r| { + if (std.mem.eql(u8, r.string, "tool")) has_tool = true; + if (std.mem.eql(u8, r.string, "arguments")) has_arguments = true; + } + try testing.expect(has_tool); + try testing.expect(has_arguments); +} + +test "issue-437: codedb_bundle ops items schema has discriminated oneOf per sub-tool" { + // Stage 2 of the bundle-schema fix. Stage 1 (#434) made `arguments` + // required but left it as a bare {type: "object"} — so a schema-greedy + // model can still emit `arguments: {}` to satisfy the required check + // without populating real keys. Stage 2 binds the *contents* of + // arguments to each sub-tool's actual inputSchema via a discriminated + // oneOf on `tool` (const) → `arguments` (sub-tool inputSchema). + // + // The augmented schema is built at runtime from the per-sub-tool + // schemas already advertised in tools_list, so there is no + // hand-maintained duplication. + const augmented = try mcp_mod.buildAugmentedToolsList(testing.allocator); + defer testing.allocator.free(augmented); + + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, augmented, .{}); + defer parsed.deinit(); + + const tools = parsed.value.object.get("tools").?.array; + var bundle_items: ?std.json.Value = null; + for (tools.items) |t| { + const name = t.object.get("name").?.string; + if (std.mem.eql(u8, name, "codedb_bundle")) { + bundle_items = t.object.get("inputSchema").?.object.get("properties").?.object.get("ops").?.object.get("items").?; + break; + } + } + try testing.expect(bundle_items != null); + + // `oneOf` array must exist on items. + const one_of_val = bundle_items.?.object.get("oneOf"); + try testing.expect(one_of_val != null); + const one_of = one_of_val.?.array; + + // Must have at least one branch per dispatchable codedb_* sub-tool. + // codedb_bundle (recursive) and codedb_edit (write op) are explicitly + // rejected by handleBundle, so they are excluded. + try testing.expect(one_of.items.len >= 10); + + // Find the codedb_outline branch and verify it pins tool to a const + // and binds arguments to a populated schema (with `path` property). + var found_outline = false; + for (one_of.items) |branch| { + const props = branch.object.get("properties").?.object; + const tool_v = props.get("tool").?; + const tool_const = tool_v.object.get("const"); + if (tool_const == null) continue; + if (!std.mem.eql(u8, tool_const.?.string, "codedb_outline")) continue; + found_outline = true; + + const args_schema = props.get("arguments").?; + const args_props = args_schema.object.get("properties").?.object; + try testing.expect(args_props.get("path") != null); + // codedb_outline requires `path` — preserved by the augmentation. + const args_required = args_schema.object.get("required").?.array; + var path_required = false; + for (args_required.items) |r| { + if (std.mem.eql(u8, r.string, "path")) path_required = true; + } + try testing.expect(path_required); + break; + } + try testing.expect(found_outline); + + // No branch should be for the recursive codedb_bundle or the write-op codedb_edit. + for (one_of.items) |branch| { + const props = branch.object.get("properties").?.object; + const tool_v = props.get("tool").?; + const tool_const = tool_v.object.get("const") orelse continue; + try testing.expect(!std.mem.eql(u8, tool_const.string, "codedb_bundle")); + try testing.expect(!std.mem.eql(u8, tool_const.string, "codedb_edit")); + } +} + test "issue-425: codedb_callers excludes substring matches in unrelated identifiers" { // handleCallers (mcp.zig:1339) currently calls searchContentWithScope(name) // which is a *substring* full-text search. The only de-dup it performs is