Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions apps/mcp-server/src/ooxml-queries.ts
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,25 @@ export interface LocalNameHit {
namespaceUri: string;
}

/**
* A local element declaration found inside a parent group / complexType /
* other owning symbol. Returned by `findLocalElementsInNamespace` and used
* by the attribute/children/element dispatchers to fall back from a
* missed top-level lookup to the inline declaration.
*/
export interface LocalElementHit {
id: number;
localName: string;
/** Clark-form type ref, e.g. `{ns}CT_OnOff`. May be null for inline complexType. */
typeRef: string | null;
parentId: number;
parentLocalName: string;
/** `group`, `complexType`, or `element` (rare). */
parentKind: string;
vocabularyId: string;
namespaceUri: string;
}

/**
* Find top-level symbols with this local name across all namespaces in a
* profile. Used to power "did you mean?" suggestions when an exact lookup
Expand Down Expand Up @@ -776,3 +795,55 @@ export async function findLocalNameAcrossNamespaces(
namespaceUri: r.namespace_uri as string,
}));
}

/**
* Find local-element declarations (xsd:element name="X" declared inline
* inside a complexType, group, or another element) for a given local
* name in a single namespace. Used as a fallback when a top-level
* `lookupElement` misses: many OOXML elements an agent encounters in
* real documents (e.g. `w:cs`, `w:lang` inside `EG_RPrBase`) are local
* and so have no global qname identity, but their owning group/type
* does, and that's all we need to resolve attributes / children.
*
* Scoped by namespace on purpose: surfacing local elements from other
* vocabularies would noise up the result without helping the agent who
* just typed `w:cs`.
*
* Returns rows in their canonical order (by parent kind, then parent
* name) so callers can present "first hit" deterministically.
*/
export async function findLocalElementsInNamespace(
sql: Sql,
localName: string,
namespace: string,
profile: string,
): Promise<LocalElementHit[]> {
const rows = await sql`
SELECT s.id, s.local_name, s.type_ref,
s.parent_symbol_id AS parent_id,
parent.local_name AS parent_local_name,
parent.kind AS parent_kind,
s.vocabulary_id, ns.uri AS namespace_uri
FROM xsd_symbols s
JOIN xsd_symbols parent ON parent.id = s.parent_symbol_id
JOIN xsd_symbol_profiles sp ON sp.symbol_id = s.id
JOIN xsd_namespaces ns ON ns.id = sp.namespace_id
JOIN xsd_profiles p ON p.id = sp.profile_id
WHERE s.local_name = ${localName}
AND s.kind = 'element'
AND s.parent_symbol_id IS NOT NULL
AND ns.uri = ${namespace}
AND p.name = ${profile}
ORDER BY parent.kind, parent.local_name
`;
return rows.map((r: Record<string, unknown>) => ({
id: r.id as number,
localName: r.local_name as string,
typeRef: r.type_ref as string | null,
parentId: r.parent_id as number,
parentLocalName: r.parent_local_name as string,
parentKind: r.parent_kind as string,
vocabularyId: r.vocabulary_id as string,
namespaceUri: r.namespace_uri as string,
}));
}
167 changes: 161 additions & 6 deletions apps/mcp-server/src/ooxml-tools.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@ import {
type AttrEntry,
type ChildEdge,
type EnumEntry,
findLocalElementsInNamespace,
findLocalNameAcrossNamespaces,
getAttributes,
getChildren,
getEnums,
getNamespaceInfo,
knownPrefixes,
type LocalElementHit,
type LocalNameHit,
listNamespaces,
lookupElement,
Expand Down Expand Up @@ -216,6 +218,17 @@ export async function runOoxmlTool(
if (!q.ok) return formatNotFound(`could not parse qname: ${q.reason}`);
const hit = await lookupElement(sql, q.qname.namespace, q.qname.localName, profile);
if (!hit) {
// No global element. Apply the same single/ambiguous policy as
// ooxml_attributes / ooxml_children so an element name with
// disagreeing local declarations (the tblGrid case) doesn't
// promote locals[0] as the answer.
const local = await resolveLocalElement(sql, q.qname, profile);
if (local.kind === "single") {
return formatLocalElementReport(q.qname, local.first, local.locals, profile);
}
if (local.kind === "ambiguous") {
return formatLocalElementAmbiguous(q.qname, local.locals);
}
const alts = await findLocalNameAcrossNamespaces(sql, q.qname.localName, profile, {
kind: "element",
});
Expand Down Expand Up @@ -264,6 +277,20 @@ export async function runOoxmlTool(
}
}
if (!typeSym) {
// Top-level lookup missed. Many elements an agent sees in real
// .docx (w:cs, w:rtl, w:lang, w:dir, w:bdo, ...) are declared
// inline inside groups / complexTypes; resolve through that
// before falling back to cross-vocab did-you-mean.
const local = await resolveLocalElement(sql, q.qname, profile);
if (local.kind === "single") {
const children = await getChildren(sql, local.typeSym.id, profile);
return formatChildrenReport(null, local.typeSym, children, profile, {
resolvedFromLocal: local.first,
});
}
if (local.kind === "ambiguous") {
return formatLocalElementAmbiguous(q.qname, local.locals);
}
const alts = await findLocalNameAcrossNamespaces(sql, q.qname.localName, profile);
return formatNotFound(
`children for ${q.qname.localName} in namespace ${q.qname.namespace}`,
Expand All @@ -287,6 +314,16 @@ export async function runOoxmlTool(
}
}
if (!typeSym) {
const local = await resolveLocalElement(sql, q.qname, profile);
if (local.kind === "single") {
const attrs = await getAttributes(sql, local.typeSym.id, profile);
return formatAttributesReport(null, local.typeSym, attrs, profile, {
resolvedFromLocal: local.first,
});
}
if (local.kind === "ambiguous") {
return formatLocalElementAmbiguous(q.qname, local.locals);
}
const alts = await findLocalNameAcrossNamespaces(sql, q.qname.localName, profile);
return formatNotFound(
`attributes for ${q.qname.localName} in namespace ${q.qname.namespace}`,
Expand Down Expand Up @@ -414,13 +451,23 @@ function formatChildrenReport(
type: SymbolHit,
children: ChildEdge[],
profile: string,
opts: { resolvedFromLocal?: LocalElementHit } = {},
): string {
const lines: string[] = [];
const heading = element
? `Children of ${element.localName} (via type ${type.localName})`
: `Children of ${type.localName}`;
const local = opts.resolvedFromLocal;
const heading = local
? `Children of ${local.localName} (resolved via local element in ${local.parentKind} ${local.parentLocalName}, type ${type.localName})`
: element
? `Children of ${element.localName} (via type ${type.localName})`
: `Children of ${type.localName}`;
lines.push(`## ${heading}`);
lines.push("");
if (local) {
lines.push(
`_\`${local.localName}\` has no top-level qname; it's a local element declared in ${local.parentKind} \`${local.parentLocalName}\`. Children come from its declared type \`${type.localName}\`._`,
);
lines.push("");
}
lines.push(`- profile: ${profile}`);
lines.push(`- type vocabulary: ${type.vocabularyId}`);
lines.push(`- type namespace: ${type.namespaceUri}`);
Expand Down Expand Up @@ -454,13 +501,23 @@ function formatAttributesReport(
type: SymbolHit,
attrs: AttrEntry[],
profile: string,
opts: { resolvedFromLocal?: LocalElementHit } = {},
): string {
const lines: string[] = [];
const heading = element
? `Attributes of ${element.localName} (via type ${type.localName})`
: `Attributes of ${type.localName}`;
const local = opts.resolvedFromLocal;
const heading = local
? `Attributes of ${local.localName} (resolved via local element in ${local.parentKind} ${local.parentLocalName}, type ${type.localName})`
: element
? `Attributes of ${element.localName} (via type ${type.localName})`
: `Attributes of ${type.localName}`;
lines.push(`## ${heading}`);
lines.push("");
if (local) {
lines.push(
`_\`${local.localName}\` has no top-level qname; it's a local element declared in ${local.parentKind} \`${local.parentLocalName}\`. Attributes come from its declared type \`${type.localName}\`._`,
);
lines.push("");
}
lines.push(`- profile: ${profile}`);
lines.push(`- type vocabulary: ${type.vocabularyId}`);
if (type.sourceName) lines.push(`- source: ${type.sourceName}`);
Expand Down Expand Up @@ -664,3 +721,101 @@ function formatPackagePartNotFound(
);
return lines.join("\n");
}

// --- Local element resolution ------------------------------------------

type LocalResolution =
| { kind: "single"; typeSym: SymbolHit; first: LocalElementHit; locals: LocalElementHit[] }
| { kind: "ambiguous"; locals: LocalElementHit[] }
| { kind: "none" };

/**
* Try to resolve a missed top-level qname through local element declarations.
*
* Rules:
* - 0 hits → none. Caller falls back to cross-vocab did-you-mean.
* - >=1 hits, all with the same non-null type_ref that resolves → single.
* The first local hit is preserved for parent-context display.
* - >=1 hits but the type_refs disagree (or none resolve) → ambiguous.
* We refuse to guess and let the caller render a disambiguation list.
*/
async function resolveLocalElement(
sql: Sql,
qname: { namespace: string; localName: string },
profile: string,
): Promise<LocalResolution> {
const locals = await findLocalElementsInNamespace(sql, qname.localName, qname.namespace, profile);
if (locals.length === 0) return { kind: "none" };

// "Single" requires every local hit to share the same non-null type_ref.
// A mix of typed and inline-typed (null type_ref) declarations - even if
// the typed ones agree - is genuinely ambiguous: the inline declaration
// has its own content model that the type symbol can't represent. Don't
// silently filter nulls out.
const firstRef = locals[0].typeRef;
if (firstRef && locals.every((l) => l.typeRef === firstRef)) {
const typeSym = await lookupSymbolByTypeRef(sql, firstRef, profile);
if (typeSym) {
return { kind: "single", typeSym, first: locals[0], locals };
}
}
// Either multiple distinct type_refs, at least one null type_ref alongside
// a typed one, or the single type_ref didn't resolve (dangling). Surface
// every declaration and let the caller pick.
return { kind: "ambiguous", locals };
}

function formatLocalElementAmbiguous(
qname: { namespace: string; localName: string },
locals: LocalElementHit[],
): string {
const lines: string[] = [];
lines.push(`## Ambiguous local element \`${qname.localName}\` in namespace ${qname.namespace}`);
lines.push("");
lines.push(
`\`${qname.localName}\` is declared inline in multiple places with different types; no single answer to return.`,
);
lines.push("");
lines.push("| owner | owner kind | type_ref |");
lines.push("| --- | --- | --- |");
for (const l of locals) {
lines.push(`| \`${l.parentLocalName}\` | ${l.parentKind} | ${l.typeRef ?? "_(none)_"} |`);
}
lines.push("");
lines.push(
"Resolve the parent owner (e.g. `ooxml_attributes` on the group/complexType) or pass the desired type directly.",
);
return lines.join("\n");
}

function formatLocalElementReport(
qname: { namespace: string; localName: string },
first: LocalElementHit,
locals: LocalElementHit[],
profile: string,
): string {
// Invariant: callers only reach here via `resolveLocalElement` returning
// `single`, which means every entry in `locals` shares `first.typeRef`.
// The ambiguous case takes a different code path
// (formatLocalElementAmbiguous), so "also declared" here doesn't risk
// implying agreement that doesn't exist.
const lines: string[] = [];
lines.push(`## Local element: ${first.localName}`);
lines.push("");
lines.push(
`_\`${first.localName}\` has no top-level qname in this namespace. It's declared inline inside ${first.parentKind} \`${first.parentLocalName}\`. Call \`ooxml_attributes\` or \`ooxml_children\` with the same qname to follow its type._`,
);
lines.push("");
lines.push(`- profile: ${profile}`);
lines.push(`- namespace: ${qname.namespace}`);
lines.push(`- vocabulary: ${first.vocabularyId}`);
if (first.typeRef) lines.push(`- type_ref: ${first.typeRef}`);
lines.push("");
if (locals.length > 1) {
lines.push(`Also declared in ${locals.length - 1} other local context(s) with the same type:`);
for (const l of locals.slice(1)) {
lines.push(`- ${l.parentKind} \`${l.parentLocalName}\``);
}
}
return lines.join("\n");
}
9 changes: 9 additions & 0 deletions tests/ingest-xsd/fixtures/main.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,15 @@
<xsd:element name="shared" type="xsd:string"/>
</xsd:sequence>
</xsd:complexType>
<!-- Mirrors WML's EG_RPrBase / w:cs case: a local element declared inside
a group, typed to a complexType. ooxml_attributes for this element's
qname should resolve through the group to the type, returning the
type's attributes. -->
<xsd:group name="EG_LocalCase">
<xsd:choice>
<xsd:element name="local_para" type="CT_Para"/>
</xsd:choice>
</xsd:group>
<!-- Mirrors WML's *Change types: a complexContent/restriction that
carries no attribute redeclarations should still inherit the base's
attribute uses, not silently drop them. -->
Expand Down
Loading