diff --git a/packages/super-editor/src/editors/v1/core/helpers/getMarksFromSelection.test.js b/packages/super-editor/src/editors/v1/core/helpers/getMarksFromSelection.test.js index e75201a812..42e3860118 100644 --- a/packages/super-editor/src/editors/v1/core/helpers/getMarksFromSelection.test.js +++ b/packages/super-editor/src/editors/v1/core/helpers/getMarksFromSelection.test.js @@ -451,7 +451,8 @@ describe('getMarksFromSelection', () => { const result = getSelectionFormattingState(cursorState); - expect(result.inlineRunProperties).toEqual({ bold: true, boldCs: true }); + // SD-2912: `boldCs` is no longer auto-propagated from the bold mark. + expect(result.inlineRunProperties).toEqual({ bold: true }); expect(result.inlineMarks.some((mark) => mark.type.name === 'bold')).toBe(true); expect(result.resolvedMarks.some((mark) => mark.type.name === 'bold')).toBe(true); }); diff --git a/packages/super-editor/src/editors/v1/core/helpers/syncParagraphRunProperties.test.js b/packages/super-editor/src/editors/v1/core/helpers/syncParagraphRunProperties.test.js index 5659a6b993..a45633c86f 100644 --- a/packages/super-editor/src/editors/v1/core/helpers/syncParagraphRunProperties.test.js +++ b/packages/super-editor/src/editors/v1/core/helpers/syncParagraphRunProperties.test.js @@ -50,7 +50,8 @@ describe('syncParagraphRunProperties', () => { italic: true, styleId: 'Heading1Char', bold: true, - boldCs: true, + // SD-2912: `boldCs` is no longer auto-propagated from the bold mark — see + // `decodeRPrFromMarks` in super-converter/styles.js. }); }); diff --git a/packages/super-editor/src/editors/v1/core/super-converter/exporter.js b/packages/super-editor/src/editors/v1/core/super-converter/exporter.js index ae6b381fae..ebcbdde124 100644 --- a/packages/super-editor/src/editors/v1/core/super-converter/exporter.js +++ b/packages/super-editor/src/editors/v1/core/super-converter/exporter.js @@ -106,6 +106,42 @@ export const ensureSectionLayoutDefaults = (sectPr, converter) => { return sectPr; }; +/** + * Walk an XML JSON tree in place and normalize every `` element's + * numeric attributes to integer twips. + * + * ECMA-376 §17.6.11 requires `ST_TwipsMeasure` values to be non-negative whole + * numbers when expressed as raw twips, but some authoring pipelines emit + * float-valued twips like `w:top="168.160400390625"` that strict consumers + * reject. SuperDoc preserves those floats verbatim on the paragraph-level + * sectPr passthrough path. This pass is the single normalization point — + * applied once at the export root — so we produce schema-valid output + * regardless of which path the pgMar values reached the tree through + * (body sectPr → `pageMargins` → `inchesToTwips`, or paragraph-level + * passthrough sectPr that preserved source attrs). + * + * Idempotent. Mutates the tree in place; returns nothing. SD-2912. + * + * @param {{ name?: string, attributes?: Record, elements?: Array } | null | undefined} node + * @returns {void} + */ +export const normalizePgMarTwipsInTree = (node) => { + if (!node || typeof node !== 'object') return; + if (node.name === 'w:pgMar' && node.attributes && typeof node.attributes === 'object') { + for (const key of Object.keys(node.attributes)) { + const value = node.attributes[key]; + if (value == null) continue; + const num = Number(value); + if (Number.isFinite(num) && !Number.isInteger(num)) { + node.attributes[key] = String(Math.round(num)); + } + } + } + if (Array.isArray(node.elements)) { + for (const child of node.elements) normalizePgMarTwipsInTree(child); + } +}; + export const isLineBreakOnlyRun = (node) => { if (!node) return false; if (node.type === 'lineBreak' || node.type === 'hardBreak') return true; @@ -402,6 +438,12 @@ function translateDocumentNode(params) { attributes, }; + // SD-2912: normalize every in the final tree to integer twips, + // catching both the body sectPr path (already integer-correct via + // inchesToTwips) and the paragraph-level passthrough sectPr path that + // preserves source attrs verbatim. + normalizePgMarTwipsInTree(node); + return [node, params]; } diff --git a/packages/super-editor/src/editors/v1/core/super-converter/exporter.pgmar-normalize.test.js b/packages/super-editor/src/editors/v1/core/super-converter/exporter.pgmar-normalize.test.js new file mode 100644 index 0000000000..5649c932d1 --- /dev/null +++ b/packages/super-editor/src/editors/v1/core/super-converter/exporter.pgmar-normalize.test.js @@ -0,0 +1,121 @@ +import { describe, it, expect } from 'vitest'; +import { normalizePgMarTwipsInTree } from './exporter.js'; + +// SD-2912: attributes must be integer twips per ECMA-376 §17.6.11 +// (ST_TwipsMeasure). Some documents carry float-valued twips like +// `w:top="168.160400390625"` that pass through the import → export pipeline +// verbatim on paragraph-level sectPr passthrough; strict consumers reject the +// result. This helper is the single normalization point: it walks the export +// XML JSON tree and rounds every numeric pgMar attribute to an integer. + +describe('normalizePgMarTwipsInTree', () => { + it('does not throw on undefined input', () => { + expect(() => normalizePgMarTwipsInTree(undefined)).not.toThrow(); + }); + + it('does not throw on null input', () => { + expect(() => normalizePgMarTwipsInTree(null)).not.toThrow(); + }); + + it('leaves a tree without any w:pgMar element unchanged', () => { + const tree = { + name: 'w:document', + elements: [ + { name: 'w:body', elements: [{ name: 'w:p', elements: [] }] }, + ], + }; + const before = JSON.stringify(tree); + normalizePgMarTwipsInTree(tree); + expect(JSON.stringify(tree)).toBe(before); + }); + + it('rounds a single float pgMar attribute to an integer twips value', () => { + const tree = { name: 'w:pgMar', attributes: { 'w:top': '168.160400390625' } }; + normalizePgMarTwipsInTree(tree); + expect(tree.attributes['w:top']).toBe('168'); + }); + + it('rounds every numeric pgMar attribute, leaves already-integer values exact', () => { + const tree = { + name: 'w:pgMar', + attributes: { + 'w:top': '168.160400390625', + 'w:bottom': '146.0200023651123', + 'w:left': '352.31998443603516', + 'w:right': '663.9990234375', + 'w:gutter': '0', + 'w:header': '720', + }, + }; + normalizePgMarTwipsInTree(tree); + expect(tree.attributes).toEqual({ + 'w:top': '168', + 'w:bottom': '146', + 'w:left': '352', + 'w:right': '664', + 'w:gutter': '0', + 'w:header': '720', + }); + }); + + it('walks into nested elements and normalizes pgMar attrs at any depth', () => { + const tree = { + name: 'w:document', + elements: [ + { + name: 'w:body', + elements: [ + { + name: 'w:p', + elements: [ + { + name: 'w:pPr', + elements: [ + { + name: 'w:sectPr', + elements: [{ name: 'w:pgMar', attributes: { 'w:top': '146.0200023651123' } }], + }, + ], + }, + ], + }, + { name: 'w:sectPr', elements: [{ name: 'w:pgMar', attributes: { 'w:bottom': '352.31998443603516' } }] }, + ], + }, + ], + }; + normalizePgMarTwipsInTree(tree); + const firstPgMar = tree.elements[0].elements[0].elements[0].elements[0].elements[0]; + const secondPgMar = tree.elements[0].elements[1].elements[0]; + expect(firstPgMar.attributes['w:top']).toBe('146'); + expect(secondPgMar.attributes['w:bottom']).toBe('352'); + }); + + it('is idempotent — re-running on already-normalized values is a no-op', () => { + const tree = { name: 'w:pgMar', attributes: { 'w:top': '168.5' } }; + normalizePgMarTwipsInTree(tree); + const afterFirst = { ...tree.attributes }; + normalizePgMarTwipsInTree(tree); + expect(tree.attributes).toEqual(afterFirst); + expect(tree.attributes['w:top']).toBe('169'); + }); + + it('ignores non-numeric attribute values (defensive against future OOXML extensions)', () => { + const tree = { name: 'w:pgMar', attributes: { 'w:top': '168', 'w:custom': 'auto' } }; + normalizePgMarTwipsInTree(tree); + expect(tree.attributes['w:custom']).toBe('auto'); + }); + + it('does not affect attributes on elements other than w:pgMar', () => { + const tree = { + name: 'w:document', + elements: [ + { name: 'w:pgSz', attributes: { 'w:w': '12240.5', 'w:h': '15840.7' } }, + { name: 'w:pgMar', attributes: { 'w:top': '168.5' } }, + ], + }; + normalizePgMarTwipsInTree(tree); + expect(tree.elements[0].attributes).toEqual({ 'w:w': '12240.5', 'w:h': '15840.7' }); + expect(tree.elements[1].attributes['w:top']).toBe('169'); + }); +}); diff --git a/packages/super-editor/src/editors/v1/core/super-converter/styles.js b/packages/super-editor/src/editors/v1/core/super-converter/styles.js index f9f1a61e96..96a8ff6eef 100644 --- a/packages/super-editor/src/editors/v1/core/super-converter/styles.js +++ b/packages/super-editor/src/editors/v1/core/super-converter/styles.js @@ -535,11 +535,14 @@ export function decodeRPrFromMarks(marks) { case 'italic': case 'bold': runProperties[type] = mark.attrs.value !== '0' && mark.attrs.value !== false; - if (type === 'bold') { - runProperties.boldCs = runProperties.bold; - } else if (type === 'italic') { - runProperties.italicCs = runProperties.italic; - } + // SD-2912: do NOT auto-propagate `boldCs` / `italicCs` from the latin + // bold/italic mark. The complex-script companion is an independent OOXML + // property (ECMA-376 §17.3.2). Auto-propagating it injects elements that + // weren't in the source rPr — every run gets a `` regardless of + // whether the original `` contained one. When the source genuinely + // had ``, it round-trips via the run's stored runProperties + // (preserved by the plugin's existing-keys branch — see the matching + // SD-2912 change in `calculateInlineRunPropertiesPlugin.js`). break; case 'underline': { const { underlineType, underlineColor, underlineThemeColor, underlineThemeTint, underlineThemeShade } = @@ -566,12 +569,14 @@ export function decodeRPrFromMarks(marks) { break; } case 'highlight': - if (mark.attrs.color) { - if (mark.attrs.color.toLowerCase() === 'transparent') { - runProperties.highlight = { 'w:val': 'none' }; - } else { - runProperties.highlight = { 'w:val': mark.attrs.color }; - } + if (mark.attrs.color && mark.attrs.color.toLowerCase() !== 'transparent') { + // SD-2912: the "transparent" highlight mark is synthesized from `` shading on import (see encodeMarksFromRPr's shading case). Emitting + // an explicit `` for it injects a new element on every + // run that wasn't in the source. The shading itself round-trips independently via + // its own `` element, so no information is lost by skipping the highlight + // emit for the transparent case. + runProperties.highlight = { 'w:val': mark.attrs.color }; } break; case 'link': diff --git a/packages/super-editor/src/editors/v1/core/super-converter/styles.test.js b/packages/super-editor/src/editors/v1/core/super-converter/styles.test.js index b4f246cc2d..47a663459c 100644 --- a/packages/super-editor/src/editors/v1/core/super-converter/styles.test.js +++ b/packages/super-editor/src/editors/v1/core/super-converter/styles.test.js @@ -435,7 +435,10 @@ describe('decodeRPrFromMarks', () => { { type: 'strike', attrs: { value: true } }, ]; const rPr = decodeRPrFromMarks(marks); - expect(rPr).toEqual({ bold: true, boldCs: true, italic: true, italicCs: true, strike: true }); + // SD-2912: `boldCs`/`italicCs` are independent OOXML properties and are no longer + // auto-propagated from the latin bold/italic marks. They round-trip via the run's + // stored runProperties when the source rPr actually contained them. + expect(rPr).toEqual({ bold: true, italic: true, strike: true }); }); it('should decode textStyle marks for color and fontSize', () => { @@ -487,15 +490,42 @@ describe('decodeRPrFromMarks', () => { const rPr = decodeRPrFromMarks(marks); expect(rPr).toEqual({ styleId: 'Hyperlink' }); }); + + // SD-2912: redundant default emissions on round-trip. + // OOXML default for bCs/iCs is OFF and for w:highlight is "none" — emitting them + // explicitly is byte noise that wasn't in the source. Word treats absence and explicit + // off as identical, but the export should not introduce elements that weren't there. + + it('does not propagate boldCs when the bold mark is off (SD-2912)', () => { + const marks = [{ type: 'bold', attrs: { value: false } }]; + const rPr = decodeRPrFromMarks(marks); + expect(rPr.bold).toBe(false); + expect(rPr.boldCs).toBeUndefined(); + }); + + it('does not propagate italicCs when the italic mark is off (SD-2912)', () => { + const marks = [{ type: 'italic', attrs: { value: false } }]; + const rPr = decodeRPrFromMarks(marks); + expect(rPr.italic).toBe(false); + expect(rPr.italicCs).toBeUndefined(); + }); + + it('does not emit highlight when the highlight mark is transparent (SD-2912)', () => { + const marks = [{ type: 'highlight', attrs: { color: 'transparent' } }]; + const rPr = decodeRPrFromMarks(marks); + expect(rPr.highlight).toBeUndefined(); + }); }); describe('marks encoding/decoding round-trip', () => { it('should correctly round-trip basic properties', () => { + // SD-2912: `boldCs`/`italicCs` are not represented as PM marks (they are independent + // OOXML properties tracked on the run's stored runProperties). The encode/decode mark + // round-trip therefore lossily drops them at the mark layer; preservation through a + // full DOCX round-trip happens via the run-properties passthrough path. const initialRPr = { bold: true, - boldCs: true, italic: true, - italicCs: true, strike: true, underline: { 'w:val': 'single', 'w:color': 'auto' }, color: { val: 'FF0000' }, diff --git a/packages/super-editor/src/editors/v1/extensions/run/calculateInlineRunPropertiesPlugin.js b/packages/super-editor/src/editors/v1/extensions/run/calculateInlineRunPropertiesPlugin.js index f33a09ac31..3865e50980 100644 --- a/packages/super-editor/src/editors/v1/extensions/run/calculateInlineRunPropertiesPlugin.js +++ b/packages/super-editor/src/editors/v1/extensions/run/calculateInlineRunPropertiesPlugin.js @@ -8,12 +8,17 @@ import { } from '@extensions/paragraph/resolvedPropertiesCache.js'; import { collectChangedRangesThroughTransactions } from '@utils/rangeUtils.js'; +// SD-2912: `boldCs` / `italicCs` are NOT marks. The OOXML companions for complex-script +// bold/italic are independent properties (ECMA-376 §17.3.2.1, §17.3.2.16) carried by the +// run's stored runProperties. Listing them here caused the plugin to overwrite their +// preserved value on every appendTransaction, which combined with the auto-propagation in +// `decodeRPrFromMarks` injected a `` / `` element into every run on round- +// trip even when the source rPr had none. Keeping them out of this set lets the existing- +// runProperties branch in `getInlineRunProperties` preserve them verbatim. const RUN_PROPERTIES_DERIVED_FROM_MARKS = new Set([ 'strike', 'italic', - 'italicCs', 'bold', - 'boldCs', 'underline', 'highlight', 'textTransform', diff --git a/packages/super-editor/src/editors/v1/tests/data/sd-2912-pgmar-roundtrip.docx b/packages/super-editor/src/editors/v1/tests/data/sd-2912-pgmar-roundtrip.docx new file mode 100644 index 0000000000..8876d14845 Binary files /dev/null and b/packages/super-editor/src/editors/v1/tests/data/sd-2912-pgmar-roundtrip.docx differ diff --git a/packages/super-editor/src/editors/v1/tests/import-export/sd-2912-pgmar-roundtrip.test.js b/packages/super-editor/src/editors/v1/tests/import-export/sd-2912-pgmar-roundtrip.test.js new file mode 100644 index 0000000000..1a923697d1 --- /dev/null +++ b/packages/super-editor/src/editors/v1/tests/import-export/sd-2912-pgmar-roundtrip.test.js @@ -0,0 +1,130 @@ +import { describe, it, expect } from 'vitest'; +import { dirname, join } from 'path'; +import { fileURLToPath } from 'node:url'; +import { promises as fs } from 'fs'; +import { Editor } from '@core/Editor.js'; +import DocxZipper from '@core/DocxZipper.js'; +import { initTestEditor } from '../helpers/helpers.js'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +const countOccurrences = (haystack, needle) => { + let n = 0; + let i = 0; + while ((i = haystack.indexOf(needle, i)) !== -1) { + n++; + i += needle.length; + } + return n; +}; + +/** + * Parse every in the given XML string into a list of attribute maps. + * The fixture's pgMar elements are all self-closing single-line tags, so a regex is + * adequate here and avoids pulling in a full XML parser for the assertion. + */ +const extractPgMarAttrs = (xml) => { + const result = []; + const tagRe = /]*?)\/?>/g; + let m; + while ((m = tagRe.exec(xml)) !== null) { + const attrs = {}; + const attrRe = /([\w:]+)="([^"]*)"/g; + let am; + while ((am = attrRe.exec(m[1])) !== null) { + attrs[am[1]] = am[2]; + } + result.push(attrs); + } + return result; +}; + +async function roundTripCounts(fixtureFileName) { + const docxPath = join(__dirname, '../data', fixtureFileName); + const docxBuffer = await fs.readFile(docxPath); + + const inputZipper = new DocxZipper(); + const inputEntries = await inputZipper.getDocxData(docxBuffer, true); + const inputDocXml = inputEntries.find((e) => e.name === 'word/document.xml').content; + + const [docx, media, mediaFiles, fonts] = await Editor.loadXmlData(docxBuffer, true); + const { editor } = await initTestEditor({ content: docx, media, mediaFiles, fonts, isHeadless: true }); + + const exportedBuffer = await editor.exportDocx({ isFinalDoc: false }); + const exportedZipper = new DocxZipper(); + const exportedEntries = await exportedZipper.getDocxData(exportedBuffer, true); + const exportDocXml = exportedEntries.find((e) => e.name === 'word/document.xml').content; + + return { + input: { + bCs: countOccurrences(inputDocXml, ' { + it('does not add `` elements that were not in the source document.xml', async () => { + const { input, output } = await roundTripCounts('sd-2912-pgmar-roundtrip.docx'); + expect(input.bCs).toBe(0); + expect(output.bCs).toBe(0); + }); + + it('does not add `` elements that were not in the source document.xml', async () => { + const { input, output } = await roundTripCounts('sd-2912-pgmar-roundtrip.docx'); + expect(input.iCs).toBe(0); + expect(output.iCs).toBe(0); + }); + + it('does not add `` elements that were not in the source document.xml', async () => { + const { input, output } = await roundTripCounts('sd-2912-pgmar-roundtrip.docx'); + expect(input.highlight).toBe(0); + expect(output.highlight).toBe(0); + }); + + // SD-2912 customer ask: source has float-valued attributes like + // `w:top="168.160400390625"` that are schema-invalid per ECMA-376 §17.6.11 + // (ST_TwipsMeasure must be a non-negative whole number when expressed as + // raw twips). Strict consumers reject the document. On export we must + // normalize every pgMar twips attribute to an integer regardless of which + // path it reached the export tree through (body sectPr → pageMargins → + // inchesToTwips, or paragraph-level passthrough sectPr). + describe('SD-2912 pgMar integer-twips normalization', () => { + it('every attribute in the exported document.xml is an integer twips value', async () => { + const { output } = await roundTripCounts('sd-2912-pgmar-roundtrip.docx'); + expect(output.pgMar.length).toBeGreaterThan(0); + for (const attrs of output.pgMar) { + for (const [key, value] of Object.entries(attrs)) { + const num = Number(value); + expect(Number.isFinite(num), `pgMar attr ${key}="${value}" is not numeric`).toBe(true); + expect(Number.isInteger(num), `pgMar attr ${key}="${value}" is not an integer`).toBe(true); + } + } + }); + + it('the exported document.xml contains no decimal-valued pgMar attribute', async () => { + const { output } = await roundTripCounts('sd-2912-pgmar-roundtrip.docx'); + // Regex sanity check on raw XML — catches any pgMar attr value with a `.` + // followed by a digit, which is the symptom strict consumers reject. + expect(output.raw).not.toMatch(/]*"\d+\.\d/); + }); + + it('source fixture confirmed to carry decimal-valued pgMar attrs (otherwise the assertion is vacuous)', async () => { + const { input } = await roundTripCounts('sd-2912-pgmar-roundtrip.docx'); + const decimalSourceAttrs = input.pgMar.flatMap((attrs) => + Object.entries(attrs).filter(([, v]) => /\d+\.\d/.test(v)), + ); + expect(decimalSourceAttrs.length).toBeGreaterThan(0); + }); + }); +});