From a3116f7e831d5b0b355bcacea8a109384d7d37e1 Mon Sep 17 00:00:00 2001 From: Caio Pizzol Date: Tue, 12 May 2026 14:55:57 -0300 Subject: [PATCH 1/2] feat(mcp): resolve local-element declarations on top-level miss A real agent calling ooxml_attributes for w:cs, w:rtl, w:lang, w:dir, w:bdo - all elements that show up in real .docx files - got Not found. Reason: those elements are declared inline inside EG_RPrBase / EG_ContentRunContent groups in the WML XSD, not as top-level xsd:elements, so the global-only lookup misses them. The agent had to fall back to prose search and reconstruct an answer the schema graph actually has. ooxml_attributes / ooxml_children / ooxml_element now fall back through local-element declarations when the top-level lookup misses: - Search xsd_symbols for local elements with the same local name in the same namespace (parent_symbol_id IS NOT NULL). - If exactly one local declaration exists, or all matching declarations share the same type_ref, follow that type and return the report. The header surfaces "resolved via local element in , type X" so the agent understands the indirection. - If multiple declarations have different type_refs (the tblGrid case), return a disambiguation list - never a guess. - For ooxml_element specifically, return a local-element report ("## Local element: X") rather than pretending it's a global Element. Same-namespace local resolution suppresses the cross-vocab did-you-mean that PR 1 introduced. The trace that motivated this work showed w:cs surfacing r:cs (a relationship attribute) as a suggestion - technically true but unhelpful. Local resolution wins; cross-vocab stays as the last-resort fallback only when no same-namespace local exists. No DB migration: parent_symbol_id and type_ref were already populated during ingest. This is a query/dispatch enhancement. Tests cover both layers: - Helper level (fixture XSDs): findLocalElementsInNamespace returns scoped results, excludes top-level, reports parent kind and name, preserves per-parent type_refs for the ambiguous case. - Dispatch level: a new fixture (EG_LocalCase containing local_para typed CT_Para) exercises the single-match resolution end to end, asserting "resolved via local element" headers and the expected attributes. The ambiguous-shared fixture confirms disambiguation without guessing. - Real-cache acceptance (gated on the full Transitional bundle): w:cs / w:rtl resolve to CT_OnOff.val; w:lang to CT_Language with val, eastAsia, bidi; w:dir / w:bdo to their respective types' val attribute; w:cs no longer leads with the cross-vocab r:cs hint. --- apps/mcp-server/src/ooxml-queries.ts | 71 +++++++++++ apps/mcp-server/src/ooxml-tools.ts | 163 ++++++++++++++++++++++++- tests/ingest-xsd/fixtures/main.xsd | 9 ++ tests/ingest-xsd/ingest.test.ts | 67 +++++++++- tests/ingest-xsd/parse-schema.test.ts | 4 +- tests/mcp-server/ooxml-queries.test.ts | 113 +++++++++++++++++ 6 files changed, 415 insertions(+), 12 deletions(-) diff --git a/apps/mcp-server/src/ooxml-queries.ts b/apps/mcp-server/src/ooxml-queries.ts index 90e59ac..a81f9dd 100644 --- a/apps/mcp-server/src/ooxml-queries.ts +++ b/apps/mcp-server/src/ooxml-queries.ts @@ -727,6 +727,25 @@ export interface LocalNameHit { namespaceUri: string; } +/** + * A local element declaration found inside a parent group / complexType / + * other owning symbol. Returned by `findLocalElementsInNamespace` and used + * by the attribute/children/element dispatchers to fall back from a + * missed top-level lookup to the inline declaration. + */ +export interface LocalElementHit { + id: number; + localName: string; + /** Clark-form type ref, e.g. `{ns}CT_OnOff`. May be null for inline complexType. */ + typeRef: string | null; + parentId: number; + parentLocalName: string; + /** `group`, `complexType`, or `element` (rare). */ + parentKind: string; + vocabularyId: string; + namespaceUri: string; +} + /** * Find top-level symbols with this local name across all namespaces in a * profile. Used to power "did you mean?" suggestions when an exact lookup @@ -776,3 +795,55 @@ export async function findLocalNameAcrossNamespaces( namespaceUri: r.namespace_uri as string, })); } + +/** + * Find local-element declarations (xsd:element name="X" declared inline + * inside a complexType, group, or another element) for a given local + * name in a single namespace. Used as a fallback when a top-level + * `lookupElement` misses: many OOXML elements an agent encounters in + * real documents (e.g. `w:cs`, `w:lang` inside `EG_RPrBase`) are local + * and so have no global qname identity, but their owning group/type + * does, and that's all we need to resolve attributes / children. + * + * Scoped by namespace on purpose: surfacing local elements from other + * vocabularies would noise up the result without helping the agent who + * just typed `w:cs`. + * + * Returns rows in their canonical order (by parent kind, then parent + * name) so callers can present "first hit" deterministically. + */ +export async function findLocalElementsInNamespace( + sql: Sql, + localName: string, + namespace: string, + profile: string, +): Promise { + const rows = await sql` + SELECT s.id, s.local_name, s.type_ref, + s.parent_symbol_id AS parent_id, + parent.local_name AS parent_local_name, + parent.kind AS parent_kind, + s.vocabulary_id, ns.uri AS namespace_uri + FROM xsd_symbols s + JOIN xsd_symbols parent ON parent.id = s.parent_symbol_id + JOIN xsd_symbol_profiles sp ON sp.symbol_id = s.id + JOIN xsd_namespaces ns ON ns.id = sp.namespace_id + JOIN xsd_profiles p ON p.id = sp.profile_id + WHERE s.local_name = ${localName} + AND s.kind = 'element' + AND s.parent_symbol_id IS NOT NULL + AND ns.uri = ${namespace} + AND p.name = ${profile} + ORDER BY parent.kind, parent.local_name + `; + return rows.map((r: Record) => ({ + id: r.id as number, + localName: r.local_name as string, + typeRef: r.type_ref as string | null, + parentId: r.parent_id as number, + parentLocalName: r.parent_local_name as string, + parentKind: r.parent_kind as string, + vocabularyId: r.vocabulary_id as string, + namespaceUri: r.namespace_uri as string, + })); +} diff --git a/apps/mcp-server/src/ooxml-tools.ts b/apps/mcp-server/src/ooxml-tools.ts index 47825ec..d67565d 100644 --- a/apps/mcp-server/src/ooxml-tools.ts +++ b/apps/mcp-server/src/ooxml-tools.ts @@ -15,12 +15,14 @@ import { type AttrEntry, type ChildEdge, type EnumEntry, + findLocalElementsInNamespace, findLocalNameAcrossNamespaces, getAttributes, getChildren, getEnums, getNamespaceInfo, knownPrefixes, + type LocalElementHit, type LocalNameHit, listNamespaces, lookupElement, @@ -216,6 +218,20 @@ export async function runOoxmlTool( if (!q.ok) return formatNotFound(`could not parse qname: ${q.reason}`); const hit = await lookupElement(sql, q.qname.namespace, q.qname.localName, profile); if (!hit) { + // No global element. Look for local-element declarations in the + // same namespace; they're how OOXML expresses things like + // w:cs, w:lang, w:bdo etc. and the report can name the owning + // group/complexType plus the declared type honestly without + // pretending it's a top-level Element. + const locals = await findLocalElementsInNamespace( + sql, + q.qname.localName, + q.qname.namespace, + profile, + ); + if (locals.length > 0) { + return formatLocalElementReport(q.qname, locals, profile); + } const alts = await findLocalNameAcrossNamespaces(sql, q.qname.localName, profile, { kind: "element", }); @@ -264,6 +280,20 @@ export async function runOoxmlTool( } } if (!typeSym) { + // Top-level lookup missed. Many elements an agent sees in real + // .docx (w:cs, w:rtl, w:lang, w:dir, w:bdo, ...) are declared + // inline inside groups / complexTypes; resolve through that + // before falling back to cross-vocab did-you-mean. + const local = await resolveLocalElement(sql, q.qname, profile); + if (local.kind === "single") { + const children = await getChildren(sql, local.typeSym.id, profile); + return formatChildrenReport(null, local.typeSym, children, profile, { + resolvedFromLocal: local.first, + }); + } + if (local.kind === "ambiguous") { + return formatLocalElementAmbiguous(q.qname, local.locals); + } const alts = await findLocalNameAcrossNamespaces(sql, q.qname.localName, profile); return formatNotFound( `children for ${q.qname.localName} in namespace ${q.qname.namespace}`, @@ -287,6 +317,16 @@ export async function runOoxmlTool( } } if (!typeSym) { + const local = await resolveLocalElement(sql, q.qname, profile); + if (local.kind === "single") { + const attrs = await getAttributes(sql, local.typeSym.id, profile); + return formatAttributesReport(null, local.typeSym, attrs, profile, { + resolvedFromLocal: local.first, + }); + } + if (local.kind === "ambiguous") { + return formatLocalElementAmbiguous(q.qname, local.locals); + } const alts = await findLocalNameAcrossNamespaces(sql, q.qname.localName, profile); return formatNotFound( `attributes for ${q.qname.localName} in namespace ${q.qname.namespace}`, @@ -414,13 +454,23 @@ function formatChildrenReport( type: SymbolHit, children: ChildEdge[], profile: string, + opts: { resolvedFromLocal?: LocalElementHit } = {}, ): string { const lines: string[] = []; - const heading = element - ? `Children of ${element.localName} (via type ${type.localName})` - : `Children of ${type.localName}`; + const local = opts.resolvedFromLocal; + const heading = local + ? `Children of ${local.localName} (resolved via local element in ${local.parentKind} ${local.parentLocalName}, type ${type.localName})` + : element + ? `Children of ${element.localName} (via type ${type.localName})` + : `Children of ${type.localName}`; lines.push(`## ${heading}`); lines.push(""); + if (local) { + lines.push( + `_\`${local.localName}\` has no top-level qname; it's a local element declared in ${local.parentKind} \`${local.parentLocalName}\`. Children come from its declared type \`${type.localName}\`._`, + ); + lines.push(""); + } lines.push(`- profile: ${profile}`); lines.push(`- type vocabulary: ${type.vocabularyId}`); lines.push(`- type namespace: ${type.namespaceUri}`); @@ -454,13 +504,23 @@ function formatAttributesReport( type: SymbolHit, attrs: AttrEntry[], profile: string, + opts: { resolvedFromLocal?: LocalElementHit } = {}, ): string { const lines: string[] = []; - const heading = element - ? `Attributes of ${element.localName} (via type ${type.localName})` - : `Attributes of ${type.localName}`; + const local = opts.resolvedFromLocal; + const heading = local + ? `Attributes of ${local.localName} (resolved via local element in ${local.parentKind} ${local.parentLocalName}, type ${type.localName})` + : element + ? `Attributes of ${element.localName} (via type ${type.localName})` + : `Attributes of ${type.localName}`; lines.push(`## ${heading}`); lines.push(""); + if (local) { + lines.push( + `_\`${local.localName}\` has no top-level qname; it's a local element declared in ${local.parentKind} \`${local.parentLocalName}\`. Attributes come from its declared type \`${type.localName}\`._`, + ); + lines.push(""); + } lines.push(`- profile: ${profile}`); lines.push(`- type vocabulary: ${type.vocabularyId}`); if (type.sourceName) lines.push(`- source: ${type.sourceName}`); @@ -664,3 +724,94 @@ function formatPackagePartNotFound( ); return lines.join("\n"); } + +// --- Local element resolution ------------------------------------------ + +type LocalResolution = + | { kind: "single"; typeSym: SymbolHit; first: LocalElementHit; locals: LocalElementHit[] } + | { kind: "ambiguous"; locals: LocalElementHit[] } + | { kind: "none" }; + +/** + * Try to resolve a missed top-level qname through local element declarations. + * + * Rules: + * - 0 hits → none. Caller falls back to cross-vocab did-you-mean. + * - >=1 hits, all with the same non-null type_ref that resolves → single. + * The first local hit is preserved for parent-context display. + * - >=1 hits but the type_refs disagree (or none resolve) → ambiguous. + * We refuse to guess and let the caller render a disambiguation list. + */ +async function resolveLocalElement( + sql: Sql, + qname: { namespace: string; localName: string }, + profile: string, +): Promise { + const locals = await findLocalElementsInNamespace(sql, qname.localName, qname.namespace, profile); + if (locals.length === 0) return { kind: "none" }; + + const typeRefs = new Set(locals.map((l) => l.typeRef).filter((t): t is string => !!t)); + if (typeRefs.size === 1) { + const [typeRef] = typeRefs; + const typeSym = await lookupSymbolByTypeRef(sql, typeRef, profile); + if (typeSym) { + return { kind: "single", typeSym, first: locals[0], locals }; + } + } + // Either multiple distinct type_refs, or the single one didn't resolve + // (rare; would mean a dangling reference). Either way, surface the + // declarations and let the caller decide. + return { kind: "ambiguous", locals }; +} + +function formatLocalElementAmbiguous( + qname: { namespace: string; localName: string }, + locals: LocalElementHit[], +): string { + const lines: string[] = []; + lines.push(`## Ambiguous local element \`${qname.localName}\` in namespace ${qname.namespace}`); + lines.push(""); + lines.push( + `\`${qname.localName}\` is declared inline in multiple places with different types; no single answer to return.`, + ); + lines.push(""); + lines.push("| owner | owner kind | type_ref |"); + lines.push("| --- | --- | --- |"); + for (const l of locals) { + lines.push(`| \`${l.parentLocalName}\` | ${l.parentKind} | ${l.typeRef ?? "_(none)_"} |`); + } + lines.push(""); + lines.push( + "Resolve the parent owner (e.g. `ooxml_attributes` on the group/complexType) or pass the desired type directly.", + ); + return lines.join("\n"); +} + +function formatLocalElementReport( + qname: { namespace: string; localName: string }, + locals: LocalElementHit[], + profile: string, +): string { + const lines: string[] = []; + const first = locals[0]; + lines.push(`## Local element: ${first.localName}`); + lines.push(""); + lines.push( + `_\`${first.localName}\` has no top-level qname in this namespace. It's declared inline inside ${first.parentKind} \`${first.parentLocalName}\`. Call \`ooxml_attributes\` or \`ooxml_children\` with the same qname to follow its type._`, + ); + lines.push(""); + lines.push(`- profile: ${profile}`); + lines.push(`- namespace: ${qname.namespace}`); + lines.push(`- vocabulary: ${first.vocabularyId}`); + if (first.typeRef) lines.push(`- type_ref: ${first.typeRef}`); + lines.push(""); + if (locals.length > 1) { + lines.push(`Also declared in ${locals.length - 1} other local context(s):`); + for (const l of locals.slice(1)) { + lines.push( + `- ${l.parentKind} \`${l.parentLocalName}\` (type_ref: ${l.typeRef ?? "_(none)_"})`, + ); + } + } + return lines.join("\n"); +} diff --git a/tests/ingest-xsd/fixtures/main.xsd b/tests/ingest-xsd/fixtures/main.xsd index 56a3547..523dcd7 100644 --- a/tests/ingest-xsd/fixtures/main.xsd +++ b/tests/ingest-xsd/fixtures/main.xsd @@ -108,6 +108,15 @@ + + + + + + diff --git a/tests/ingest-xsd/ingest.test.ts b/tests/ingest-xsd/ingest.test.ts index 7d17dc5..ad772c1 100644 --- a/tests/ingest-xsd/ingest.test.ts +++ b/tests/ingest-xsd/ingest.test.ts @@ -245,17 +245,18 @@ test("ingest writes compositors and child edges for nested content models", asyn // CT_BaseWithChildren: sequence -> [ alpha, beta ] // CT_DerivedExtended: complexContent/extension -> sequence -> [ gamma ] // CT_NestedOrder: sequence -> [ head, choice -> [ branchA, branchB ], tail ] + // EG_LocalCase: choice -> element name="local_para" type="CT_Para" // Compositors: CT_Para(1) + CT_Body(2) + EG_PContent(1) + Base(1) + Derived(1) + - // Nested(2) + OuterA(1) + OuterB(1) = 10 - expect(stats.compositorsInserted).toBe(10); + // Nested(2) + OuterA(1) + OuterB(1) + LocalCase(1) = 11 + expect(stats.compositorsInserted).toBe(11); expect(stats.groupRefsInserted).toBe(1); // Local element symbols are scoped per-owner now, so the two `shared` decls // in CT_OuterA and CT_OuterB count separately. // Per-owner locals: text(CT_Para), break(CT_Body), r(EG_PContent), // alpha+beta(CT_BaseWithChildren), gamma(CT_DerivedExtended), // head+branchA+branchB+tail(CT_NestedOrder), shared(CT_OuterA), - // shared(CT_OuterB) = 12. - expect(stats.localElementsCreated).toBe(12); + // shared(CT_OuterB), local_para(EG_LocalCase) = 13. + expect(stats.localElementsCreated).toBe(13); expect(stats.childEdgesUnresolved).toBe(0); expect(stats.groupRefsUnresolved).toBe(0); @@ -602,3 +603,61 @@ test.skipIf(!fullBundleCacheReady)( }, 60_000, ); + +test.skipIf(!fullBundleCacheReady)( + "ooxml_attributes resolves local elements (w:cs, w:rtl, w:lang, w:dir, w:bdo) against real WML", + async () => { + // Acceptance criteria for the local-element-resolution PR. These five + // elements are declared inline inside EG_RPrBase / EG_ContentRunContent + // in the real WML XSD, not as top-level s. The dispatcher + // must follow the local declaration's type_ref and return the type's + // attributes - with the "resolved via local element" header so agents + // understand what happened. + await ingestSchemaSet({ + schemaDir: REAL_CACHE_DIR, + entrypoints: FULL_BUNDLE_ROOTS, + profileName: "transitional", + sourceName: "ecma-376-transitional", + db, + }); + + const { runOoxmlTool } = await import( + "../../apps/mcp-server/src/ooxml-tools.ts" + ); + + // CT_OnOff cases: val is the only attribute. + for (const qname of ["w:cs", "w:rtl"]) { + const out = await runOoxmlTool("ooxml_attributes", { qname }, db.sql); + expect(out).toContain("resolved via local element"); + expect(out).toContain("type CT_OnOff"); + expect(out).toContain("| val |"); + } + + // CT_Language: val, eastAsia, bidi. + const lang = await runOoxmlTool( + "ooxml_attributes", + { qname: "w:lang" }, + db.sql, + ); + expect(lang).toContain("type CT_Language"); + expect(lang).toContain("| val |"); + expect(lang).toContain("| eastAsia |"); + expect(lang).toContain("| bidi |"); + + // CT_DirContentRun: val. + const dir = await runOoxmlTool("ooxml_attributes", { qname: "w:dir" }, db.sql); + expect(dir).toContain("type CT_DirContentRun"); + expect(dir).toContain("| val |"); + + // CT_BdoContentRun: val. + const bdo = await runOoxmlTool("ooxml_attributes", { qname: "w:bdo" }, db.sql); + expect(bdo).toContain("type CT_BdoContentRun"); + expect(bdo).toContain("| val |"); + + // Suppression: same-namespace local resolution wins, so a cross-vocab + // did-you-mean (e.g. r:cs as a relationship attribute) must NOT lead. + const cs = await runOoxmlTool("ooxml_attributes", { qname: "w:cs" }, db.sql); + expect(cs).not.toContain("Other top-level symbols"); + }, + 90_000, +); diff --git a/tests/ingest-xsd/parse-schema.test.ts b/tests/ingest-xsd/parse-schema.test.ts index 0e4384d..e82a3ed 100644 --- a/tests/ingest-xsd/parse-schema.test.ts +++ b/tests/ingest-xsd/parse-schema.test.ts @@ -85,12 +85,12 @@ test("declarationsByQName indexes all top-level declarations across documents", const set = await parseSchemaSet({ schemaDir: FIXTURES_DIR, entrypoints: ["main.xsd"] }); const counts = countByKind(set.declarationsByQName); - // main.xsd: 1 element, 16 complexType, 1 simpleType, 1 group, 3 attributeGroup + // main.xsd: 1 element, 16 complexType, 1 simpleType, 2 group, 3 attributeGroup // shared.xsd: 2 simpleType, 1 attribute expect(counts.element).toBe(1); expect(counts.complexType).toBe(16); expect(counts.simpleType).toBe(3); - expect(counts.group).toBe(1); + expect(counts.group).toBe(2); expect(counts.attributeGroup).toBe(3); expect(counts.attribute).toBe(1); diff --git a/tests/mcp-server/ooxml-queries.test.ts b/tests/mcp-server/ooxml-queries.test.ts index a84260f..5fc056e 100644 --- a/tests/mcp-server/ooxml-queries.test.ts +++ b/tests/mcp-server/ooxml-queries.test.ts @@ -8,6 +8,7 @@ import { afterAll, beforeAll, expect, test } from "bun:test"; import { createDbClient, type DbClient } from "../../packages/shared/src/db/index.ts"; import { ingestSchemaSet } from "../../scripts/ingest-xsd/ingest.ts"; import { + findLocalElementsInNamespace, findLocalNameAcrossNamespaces, getAttributes, getChildren, @@ -513,3 +514,115 @@ test("ooxml_type: not-found shows 'Other top-level symbols' when only an element expect(out).toContain("Other top-level symbols named `document`"); expect(out).toContain("element `document` in wml-main"); }); + +// --- Local element resolution ------------------------------------------- + +test("findLocalElementsInNamespace: scoped to namespace and excludes top-level", async () => { + // `text` is locally declared inline inside CT_Para. lookupElement filters + // it out (parent_symbol_id IS NULL); this helper returns it with parent + // context so the dispatcher can resolve its declared type. + const hits = await findLocalElementsInNamespace(db.sql, "text", WML_NS, "transitional"); + expect(hits).toHaveLength(1); + expect(hits[0].parentLocalName).toBe("CT_Para"); + expect(hits[0].parentKind).toBe("complexType"); + expect(hits[0].typeRef).toBe("{http://www.w3.org/2001/XMLSchema}string"); + + // Scoped by namespace: `text` is WML-only in fixtures. + const wrongNs = await findLocalElementsInNamespace(db.sql, "text", SHARED_NS, "transitional"); + expect(wrongNs).toHaveLength(0); + + // Top-level elements aren't returned by this helper. + const topLevel = await findLocalElementsInNamespace(db.sql, "document", WML_NS, "transitional"); + expect(topLevel).toHaveLength(0); +}); + +test("findLocalElementsInNamespace: ambiguous case returns one hit per parent with own type_ref", async () => { + // Fixture: `shared` is locally declared in CT_OuterA (type ST_Jc) and + // CT_OuterB (type xsd:string). Both rows must come back with their own + // type_ref so the dispatcher can choose disambiguation over guessing. + const hits = await findLocalElementsInNamespace(db.sql, "shared", WML_NS, "transitional"); + expect(hits).toHaveLength(2); + const byParent = Object.fromEntries(hits.map((h) => [h.parentLocalName, h.typeRef])); + expect(byParent.CT_OuterA).toBe(`{${WML_NS}}ST_Jc`); + expect(byParent.CT_OuterB).toBe("{http://www.w3.org/2001/XMLSchema}string"); +}); + +test("findLocalElementsInNamespace: local element inside a group", async () => { + // Fixture: `local_para` is declared inline inside EG_LocalCase (group), + // typed CT_Para. The dispatcher needs this to resolve attributes / + // children via the declared complexType - the WML w:cs / EG_RPrBase + // shape. + const hits = await findLocalElementsInNamespace(db.sql, "local_para", WML_NS, "transitional"); + expect(hits).toHaveLength(1); + expect(hits[0].parentKind).toBe("group"); + expect(hits[0].parentLocalName).toBe("EG_LocalCase"); + expect(hits[0].typeRef).toBe(`{${WML_NS}}CT_Para`); +}); + +test("ooxml_attributes: resolves through local element to declared type (single match)", async () => { + // w:local_para is a local element typed CT_Para; CT_Para has the `bold` + // attribute. The dispatcher should follow the type_ref and return CT_Para's + // attributes with a "resolved via local element" header. + const out = await runOoxmlTool( + "ooxml_attributes", + { qname: "w:local_para" }, + db.sql, + ); + expect(out).toContain("resolved via local element"); + expect(out).toContain("group `EG_LocalCase`"); + expect(out).toContain("type CT_Para"); + // CT_Para's bold attribute is present in the body. + expect(out).toContain("| bold |"); +}); + +test("ooxml_children: resolves through local element to declared type (single match)", async () => { + // Same w:local_para → CT_Para. CT_Para has the inline `text` element as + // its only child. + const out = await runOoxmlTool( + "ooxml_children", + { qname: "w:local_para" }, + db.sql, + ); + expect(out).toContain("resolved via local element"); + expect(out).toContain("group `EG_LocalCase`"); + expect(out).toContain("| text |"); +}); + +test("ooxml_attributes: ambiguous local declarations return a disambiguation, not a guess", async () => { + // Fixture: `shared` is declared in CT_OuterA (ST_Jc) and CT_OuterB + // (xsd:string). The dispatcher must refuse to pick one and instead list + // both with their owning type, so the agent can drive the next call. + const out = await runOoxmlTool("ooxml_attributes", { qname: "w:shared" }, db.sql); + expect(out).toContain("Ambiguous local element `shared`"); + expect(out).toContain("CT_OuterA"); + expect(out).toContain("CT_OuterB"); + // The disambiguation must not also surface a cross-vocab "Other top-level + // symbols" block - same-namespace local candidates win. + expect(out).not.toContain("Other top-level symbols"); +}); + +test("ooxml_element: returns a local-element report, not a fake global Element", async () => { + // w:local_para has no global Element entry. ooxml_element should report + // it as a local declaration with parent + type_ref, without using the + // "Element:" heading that implies a top-level symbol. + const out = await runOoxmlTool("ooxml_element", { qname: "w:local_para" }, db.sql); + expect(out).toContain("## Local element: local_para"); + expect(out).toContain("group `EG_LocalCase`"); + expect(out).toContain("type_ref:"); + // Should NOT use the global Element heading. + expect(out).not.toContain("## Element: local_para"); +}); + +test("ooxml_attributes: same-namespace local match suppresses cross-vocab did-you-mean", async () => { + // w:local_para resolves through a local declaration. Even if a same-named + // symbol existed in another vocab, the local resolution should win and + // the "Other top-level symbols" did-you-mean block should not appear. + const out = await runOoxmlTool( + "ooxml_attributes", + { qname: "w:local_para" }, + db.sql, + ); + expect(out).not.toContain("Other top-level symbols"); + // And we did get the genuine resolution. + expect(out).toContain("| bold |"); +}); From e98391389d8bb6ffa7c967d0e7447ca248239d6c Mon Sep 17 00:00:00 2001 From: Caio Pizzol Date: Tue, 12 May 2026 15:15:08 -0300 Subject: [PATCH 2/2] fix(mcp): address PR #8 review findings on local-element resolution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ooxml_element on an ambiguous local name (e.g. fixture w:shared, declared in CT_OuterA as ST_Jc AND CT_OuterB as xsd:string) previously called formatLocalElementReport which promoted locals[0] as the canonical type and listed the rest under "also declared". That implied a primary answer where none exists. ooxml_element now goes through resolveLocalElement like the other dispatchers: same single/ambiguous policy, ambiguous cases use formatLocalElementAmbiguous (no primary), formatLocalElementReport is reached only when callers proved the locals share one type_ref. - resolveLocalElement filtered null type_refs out before checking for a single value. A mix like [null, CT_X] would collapse to {CT_X} and resolve incorrectly — the inline-typed declaration has its own content model the type symbol can't represent. Now requires every local hit to share the same non-null type_ref before resolving as single; anything else (mixed nulls, multiple non-null values) goes to ambiguous. formatLocalElementReport signature changes to take the resolved (first, locals) explicitly, with a comment documenting the "all share one type_ref" invariant. "Also declared in N other contexts" now drops the per-context type_ref (it's the same as the primary) and says "with the same type" to make the invariant visible to readers. Tests cover the ooxml_element regression: w:shared (the existing fixture for type-disagreement) now exercises the ambiguous path, verifying the single-resolution heading does not appear and no "Also declared in" footer leaks the first hit as canonical. --- apps/mcp-server/src/ooxml-tools.ts | 54 ++++++++++++++------------ tests/mcp-server/ooxml-queries.test.ts | 17 ++++++++ 2 files changed, 46 insertions(+), 25 deletions(-) diff --git a/apps/mcp-server/src/ooxml-tools.ts b/apps/mcp-server/src/ooxml-tools.ts index d67565d..8541132 100644 --- a/apps/mcp-server/src/ooxml-tools.ts +++ b/apps/mcp-server/src/ooxml-tools.ts @@ -218,19 +218,16 @@ export async function runOoxmlTool( if (!q.ok) return formatNotFound(`could not parse qname: ${q.reason}`); const hit = await lookupElement(sql, q.qname.namespace, q.qname.localName, profile); if (!hit) { - // No global element. Look for local-element declarations in the - // same namespace; they're how OOXML expresses things like - // w:cs, w:lang, w:bdo etc. and the report can name the owning - // group/complexType plus the declared type honestly without - // pretending it's a top-level Element. - const locals = await findLocalElementsInNamespace( - sql, - q.qname.localName, - q.qname.namespace, - profile, - ); - if (locals.length > 0) { - return formatLocalElementReport(q.qname, locals, profile); + // No global element. Apply the same single/ambiguous policy as + // ooxml_attributes / ooxml_children so an element name with + // disagreeing local declarations (the tblGrid case) doesn't + // promote locals[0] as the answer. + const local = await resolveLocalElement(sql, q.qname, profile); + if (local.kind === "single") { + return formatLocalElementReport(q.qname, local.first, local.locals, profile); + } + if (local.kind === "ambiguous") { + return formatLocalElementAmbiguous(q.qname, local.locals); } const alts = await findLocalNameAcrossNamespaces(sql, q.qname.localName, profile, { kind: "element", @@ -750,17 +747,21 @@ async function resolveLocalElement( const locals = await findLocalElementsInNamespace(sql, qname.localName, qname.namespace, profile); if (locals.length === 0) return { kind: "none" }; - const typeRefs = new Set(locals.map((l) => l.typeRef).filter((t): t is string => !!t)); - if (typeRefs.size === 1) { - const [typeRef] = typeRefs; - const typeSym = await lookupSymbolByTypeRef(sql, typeRef, profile); + // "Single" requires every local hit to share the same non-null type_ref. + // A mix of typed and inline-typed (null type_ref) declarations - even if + // the typed ones agree - is genuinely ambiguous: the inline declaration + // has its own content model that the type symbol can't represent. Don't + // silently filter nulls out. + const firstRef = locals[0].typeRef; + if (firstRef && locals.every((l) => l.typeRef === firstRef)) { + const typeSym = await lookupSymbolByTypeRef(sql, firstRef, profile); if (typeSym) { return { kind: "single", typeSym, first: locals[0], locals }; } } - // Either multiple distinct type_refs, or the single one didn't resolve - // (rare; would mean a dangling reference). Either way, surface the - // declarations and let the caller decide. + // Either multiple distinct type_refs, at least one null type_ref alongside + // a typed one, or the single type_ref didn't resolve (dangling). Surface + // every declaration and let the caller pick. return { kind: "ambiguous", locals }; } @@ -789,11 +790,16 @@ function formatLocalElementAmbiguous( function formatLocalElementReport( qname: { namespace: string; localName: string }, + first: LocalElementHit, locals: LocalElementHit[], profile: string, ): string { + // Invariant: callers only reach here via `resolveLocalElement` returning + // `single`, which means every entry in `locals` shares `first.typeRef`. + // The ambiguous case takes a different code path + // (formatLocalElementAmbiguous), so "also declared" here doesn't risk + // implying agreement that doesn't exist. const lines: string[] = []; - const first = locals[0]; lines.push(`## Local element: ${first.localName}`); lines.push(""); lines.push( @@ -806,11 +812,9 @@ function formatLocalElementReport( if (first.typeRef) lines.push(`- type_ref: ${first.typeRef}`); lines.push(""); if (locals.length > 1) { - lines.push(`Also declared in ${locals.length - 1} other local context(s):`); + lines.push(`Also declared in ${locals.length - 1} other local context(s) with the same type:`); for (const l of locals.slice(1)) { - lines.push( - `- ${l.parentKind} \`${l.parentLocalName}\` (type_ref: ${l.typeRef ?? "_(none)_"})`, - ); + lines.push(`- ${l.parentKind} \`${l.parentLocalName}\``); } } return lines.join("\n"); diff --git a/tests/mcp-server/ooxml-queries.test.ts b/tests/mcp-server/ooxml-queries.test.ts index 5fc056e..2ad7469 100644 --- a/tests/mcp-server/ooxml-queries.test.ts +++ b/tests/mcp-server/ooxml-queries.test.ts @@ -613,6 +613,23 @@ test("ooxml_element: returns a local-element report, not a fake global Element", expect(out).not.toContain("## Element: local_para"); }); +test("ooxml_element: ambiguous local declarations use the disambiguation report, not 'primary + alts'", async () => { + // w:shared is locally declared in CT_OuterA (ST_Jc) and CT_OuterB + // (xsd:string). Promoting the first hit as the primary type would mislead + // agents; the dispatcher must instead use the ambiguous report (same + // behavior as ooxml_attributes / ooxml_children) so neither declaration + // is implied to be canonical. + const out = await runOoxmlTool("ooxml_element", { qname: "w:shared" }, db.sql); + expect(out).toContain("Ambiguous local element `shared`"); + expect(out).toContain("CT_OuterA"); + expect(out).toContain("CT_OuterB"); + // Must not render the "Local element: shared" single-resolution heading. + expect(out).not.toContain("## Local element: shared"); + // And no "primary + alts" framing - if one were promoted, that would + // surface as an "Also declared in N other context(s)" footer. + expect(out).not.toContain("Also declared in"); +}); + test("ooxml_attributes: same-namespace local match suppresses cross-vocab did-you-mean", async () => { // w:local_para resolves through a local declaration. Even if a same-named // symbol existed in another vocab, the local resolution should win and