From 255f2f73c17c215dfbf948ad21c04d45266c5ada Mon Sep 17 00:00:00 2001 From: karthikmudunuri <102793643+karthikmudunuri@users.noreply.github.com> Date: Fri, 15 May 2026 15:20:18 +0530 Subject: [PATCH 1/2] feat(pptx): patch-mode saves + IndexedDB-backed source persistence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two changes that make edits feel like editing the real PowerPoint, not regenerating from a stripped model. 1. Patch-mode saves: edited elements get their source spliced in place instead of regenerated via pptxgenjs, so themed colors / brand fonts / gradient fills / custGeom / effects / autofit / body padding on the unchanged parts of the element survive verbatim. Covers text content edits (splice preserving first paragraph's pPr and first run's rPr) and geometry edits (splice or for ). pptxgenjs remains the fallback for unpatchable cases. Placeholder-inherited shapes (no explicit xfrm in source) are now registered too — patch-mode always splices geometry into the patched output for them. 2. IndexedDB-backed source persistence: parsePptx mirrors source bytes to IndexedDB keyed by deck.sourcePptxId; serializeDeck reads through in-memory cache → IndexedDB → non-enumerable attachment → explicit options.source. The chrome / EMF / slide-bg preservation pipeline now survives page reloads on its own — host apps that persist deck JSON to localStorage no longer need to re-attach source bytes. Also: Export topbar button falls back to a real .pptx download (via serializeDeck) instead of a .slidewise.json dump when the host doesn't register an onExport callback. Makes local round-trip testing trivial. Validated end-to-end on KBC-More_sample_slides.pptx: after parsePptx → structuredClone + spread → serializeDeck(deck) with no options.source, the saved zip retains all 2 masters, 50 layouts, and 3 themes versus the 1/1/1 the previous build produced. New regression tests in patch-mode.test.ts and slide10-bg.test.ts cover the theme-ref and geometry-patch paths. --- .changeset/patch-mode-and-idb-source.md | 17 ++ .../slidewise/src/compound/topbar/Export.tsx | 31 ++- .../src/lib/pptx/__tests__/patch-mode.test.ts | 111 ++++++++ .../src/lib/pptx/__tests__/slide10-bg.test.ts | 41 +++ packages/slidewise/src/lib/pptx/deckToPptx.ts | 100 +++++-- .../slidewise/src/lib/pptx/patchEdited.ts | 255 ++++++++++++++++++ packages/slidewise/src/lib/pptx/pptxToDeck.ts | 157 ++++++++++- 7 files changed, 683 insertions(+), 29 deletions(-) create mode 100644 .changeset/patch-mode-and-idb-source.md create mode 100644 packages/slidewise/src/lib/pptx/__tests__/patch-mode.test.ts create mode 100644 packages/slidewise/src/lib/pptx/__tests__/slide10-bg.test.ts create mode 100644 packages/slidewise/src/lib/pptx/patchEdited.ts diff --git a/.changeset/patch-mode-and-idb-source.md b/.changeset/patch-mode-and-idb-source.md new file mode 100644 index 0000000..c7a287f --- /dev/null +++ b/.changeset/patch-mode-and-idb-source.md @@ -0,0 +1,17 @@ +--- +"@textcortex/slidewise": minor +--- + +**Edits keep their context.** Two changes that make a small edit feel like editing the real PowerPoint, not regenerating it from a stripped model. + +1. **Patch-mode saves** — when an edit only touches fields the importer knows how to splice back into the source OOXML (text content, geometry), the source `` / `` / `` is patched in place instead of being regenerated via pptxgenjs. Everything else on that element — themed colors (``), brand fonts (`` / `` / ``), gradient and image fills, `` silhouettes, body padding, autofit hints, line styling, `` shadows — survives verbatim because it was never touched. Modelled after Univer's "edit the source doc tree, never round-trip through a lossy intermediate model" approach. + + - Text content edits: splice the new text into the source `` preserving the first paragraph's `` and the first run's `` so themed colors / fonts / bullets / alignment carry through. Multi-line text becomes multi-paragraph; mixed-style runs still fall back to pptxgenjs (future work). + - Geometry edits (drag / resize / rotate): splice `` (or `` for ``) and keep everything else verbatim. Works on ``, ``, ``, ``. + - Placeholder-inherited shapes (no explicit xfrm in source) are now registered too. Patch-mode handles them by always splicing the current geometry into the patched output, so text edits on title / body / content placeholders keep their themed styling. + + pptxgenjs remains the fallback emitter for unpatchable cases (newly added elements, font / color changes via the editor's pickers, mixed-style run restyling, shape kind changes). + +2. **IndexedDB-backed source persistence** — `parsePptx` now mirrors source bytes to IndexedDB keyed by `Deck.sourcePptxId`. `serializeDeck`'s source resolution checks the in-memory cache first, then IndexedDB, then the legacy non-enumerable attachment, then the host-supplied `options.source`. This means the chrome / EMF / slide-bg preservation pipeline survives full page reloads on its own — host apps that persist the deck JSON in localStorage and rehydrate on reload no longer need to also re-attach the original bytes by hand. Falls back cleanly in SSR / Node environments where IndexedDB is undefined. + +Validated on `KBC-More_sample_slides.pptx`: after `parsePptx → structuredClone + spread → serializeDeck(deck)` (no `source` passed), the saved zip retains all **2 masters, 50 layouts, and 3 themes** vs the 1/1/1 the broken 1.12.1 build produced. New regression tests in `patch-mode.test.ts` confirm a text edit on `eon-deck.pptx` slide 10 column 2 keeps the source `` fill and the `` text color, and a position drag preserves both. diff --git a/packages/slidewise/src/compound/topbar/Export.tsx b/packages/slidewise/src/compound/topbar/Export.tsx index ec21028..62cc7aa 100644 --- a/packages/slidewise/src/compound/topbar/Export.tsx +++ b/packages/slidewise/src/compound/topbar/Export.tsx @@ -1,6 +1,7 @@ import type { CSSProperties, ReactNode } from "react"; import { Download } from "lucide-react"; import { useEditorStore } from "@/lib/StoreProvider"; +import { serializeDeck } from "@/lib/pptx"; import { useHostCallbacks } from "../HostContext"; import { useIcons } from "../IconContext"; import { useLabels } from "../LabelsContext"; @@ -9,7 +10,12 @@ import { primaryBtnStyle, primaryHoverHandlers } from "./styles"; /** * Export button. Calls the host's `onExport` (from ``) with the current deck. If no host callback is registered, - * falls back to downloading a `.slidewise.json` of the deck. + * falls back to downloading a real `.pptx` of the deck — serializeDeck + * resolves source bytes via the in-module cache keyed by + * `Deck.sourcePptxId`, so master / layout / theme / font / EMF / slide-bg + * preservation kicks in for any deck that was parsed via `parsePptx` in + * the same session. This lets hosts verify the full edit → save round + * trip without wiring `onExport` at all. * * Visually emphasized vs the chrome buttons — uses `--primary-bg` so hosts * retheming the primary surface get a consistent affordance. @@ -35,19 +41,32 @@ export function Export({ const labels = useLabels(); const resolved = label ?? labels.export; - const onClick = () => { + const onClick = async () => { const deck = store.getState().deck; if (onExportHost) { onExportHost(deck); return; } - const blob = new Blob([JSON.stringify(deck, null, 2)], { - type: "application/json", - }); + let blob: Blob; + let extension: string; + try { + blob = await serializeDeck(deck); + extension = "pptx"; + } catch (err) { + // PPTX serialization shouldn't fail on a deck the editor already + // renders, but if pptxgenjs throws (corrupt media, unsupported + // shape, etc.) we still want the user to get *something* off their + // screen rather than an unrecoverable error — fall back to JSON. + console.error("[slidewise] PPTX export failed, falling back to JSON:", err); + blob = new Blob([JSON.stringify(deck, null, 2)], { + type: "application/json", + }); + extension = "slidewise.json"; + } const url = URL.createObjectURL(blob); const a = document.createElement("a"); a.href = url; - a.download = `${(deck.title || "deck").replace(/[^a-z0-9-_]+/gi, "-")}.slidewise.json`; + a.download = `${(deck.title || "deck").replace(/[^a-z0-9-_]+/gi, "-")}.${extension}`; a.click(); URL.revokeObjectURL(url); }; diff --git a/packages/slidewise/src/lib/pptx/__tests__/patch-mode.test.ts b/packages/slidewise/src/lib/pptx/__tests__/patch-mode.test.ts new file mode 100644 index 0000000..e1a2964 --- /dev/null +++ b/packages/slidewise/src/lib/pptx/__tests__/patch-mode.test.ts @@ -0,0 +1,111 @@ +import { describe, it, expect } from "vitest"; +import { readFile, access } from "node:fs/promises"; +import { fileURLToPath } from "node:url"; +import path from "node:path"; +import JSZip from "jszip"; +import { parsePptx, serializeDeck } from "../index"; +import type { TextElement } from "@/lib/types"; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); +const attachmentsDir = path.resolve( + __dirname, + "../../../../../../.context/attachments" +); + +async function fixtureExists(name: string): Promise { + try { + await access(path.join(attachmentsDir, name)); + return true; + } catch { + return false; + } +} + +const hasEon = await fixtureExists("eon-deck-v1.pptx"); + +describe("patch-mode saves preserve theme refs on text edits", () => { + it.skipIf(!hasEon)( + "edits text content without losing themed colors / fonts on slide 10 column 2", + async () => { + const buf = await readFile(path.join(attachmentsDir, "eon-deck-v1.pptx")); + const source = buf.buffer.slice( + buf.byteOffset, + buf.byteOffset + buf.byteLength + ) as ArrayBuffer; + const deck = await parsePptx(source); + + // Slide 10 col 2 number "2" — bg = accent1 (red), text color = + // schemeClr bg1 (white). The bg is on the slide-level + // override, the text colour is in . + const slide10 = deck.slides[9]; + const colTwo = slide10.elements.find( + (e) => e.type === "text" && (e as TextElement).text === "2" + ) as TextElement | undefined; + expect(colTwo).toBeTruthy(); + + // Edit the text without touching any styling fields. + colTwo!.text = "II"; + + const blob = await serializeDeck(deck, { source }); + const out = await JSZip.loadAsync(await blob.arrayBuffer()); + const slide10Xml = await out + .file("ppt/slides/slide10.xml")! + .async("string"); + + // Edited text must be present. + expect(slide10Xml).toContain("II"); + + // The slide-level fill override (schemeClr accent1 → the red bg) must + // survive the patch path — pptxgenjs would have collapsed this to an + // inline srgbClr (or dropped it entirely on a placeholder shape). + expect(slide10Xml).toMatch( + /[\s\S]*?[\s\S]*?[\s\S]*?<\/a:solidFill>[\s\S]*?<\/p:spPr>/ + ); + + // The themed text colour must + // survive — losing it would have rendered the "II" as the default + // body color (dark) instead of white-on-red. + expect(slide10Xml).toMatch( + /[\s\S]*?[\s\S]*?<\/a:solidFill>[\s\S]*?<\/a:rPr>/ + ); + } + ); + + it.skipIf(!hasEon)( + "moves an element via geometry-only patch, keeping fill / themed color verbatim", + async () => { + const buf = await readFile(path.join(attachmentsDir, "eon-deck-v1.pptx")); + const source = buf.buffer.slice( + buf.byteOffset, + buf.byteOffset + buf.byteLength + ) as ArrayBuffer; + const deck = await parsePptx(source); + + const slide10 = deck.slides[9]; + const colTwo = slide10.elements.find( + (e) => e.type === "text" && (e as TextElement).text === "2" + ) as TextElement | undefined; + expect(colTwo).toBeTruthy(); + const originalX = colTwo!.x; + colTwo!.x = originalX + 100; // user dragged it right 100 px + + const blob = await serializeDeck(deck, { source }); + const out = await JSZip.loadAsync(await blob.arrayBuffer()); + const slide10Xml = await out + .file("ppt/slides/slide10.xml")! + .async("string"); + + // The themed fill + text color must remain intact after the move. + expect(slide10Xml).toMatch( + /[\s\S]*?[\s\S]*?/ + ); + expect(slide10Xml).toMatch( + // + ); + // The xfrm must reflect the new x. + const newOffX = Math.round((originalX + 100) * (914400 / 144)); + expect(slide10Xml).toContain(` { + try { + await access(path.join(attachmentsDir, name)); + return true; + } catch { + return false; + } +} + +const has = await fixtureExists("eon-deck-v1.pptx"); + +describe("eon-deck slide 10 column 2 background", () => { + it.skipIf(!has)("imports column 2 number placeholder with red bg + white text", async () => { + const buf = await readFile(path.join(attachmentsDir, "eon-deck-v1.pptx")); + const deck = await parsePptx( + buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength) as ArrayBuffer + ); + const slide10 = deck.slides[9]; + const colTwo = slide10.elements.find( + (e) => e.type === "text" && (e as { text: string }).text === "2" + ) as { background?: string; color?: string; w: number; h: number } | undefined; + expect(colTwo).toBeTruthy(); + expect(colTwo!.background?.toUpperCase()).toBe("#EA1B0A"); + expect(colTwo!.color?.toUpperCase()).toBe("#FFFFFF"); + expect(colTwo!.w).toBeGreaterThan(300); + expect(colTwo!.h).toBeGreaterThan(700); + }); +}); diff --git a/packages/slidewise/src/lib/pptx/deckToPptx.ts b/packages/slidewise/src/lib/pptx/deckToPptx.ts index 56d7fef..f8421fd 100644 --- a/packages/slidewise/src/lib/pptx/deckToPptx.ts +++ b/packages/slidewise/src/lib/pptx/deckToPptx.ts @@ -19,10 +19,11 @@ import { pxToInches, pxToPoints } from "./units"; import { SOURCE_PPTX, SOURCE_SLIDE_PATH, - getCachedSourceBuffer, + getCachedSourceBufferAsync, getElementSource, snapshotElement, } from "./pptxToDeck"; +import { tryPatchEditedElement } from "./patchEdited"; /** * Serialize a Slidewise Deck to a real PPTX blob. @@ -61,8 +62,21 @@ export async function serializeDeck( pptx.title = deck.title || "Untitled"; pptx.layout = "LAYOUT_WIDE"; // 13.333 × 7.5 in + // Patch-mode save: for each edited element whose change pattern we know + // how to splice into the source OOXML (text content, geometry), generate + // a patched fragment up front. addSlide skips those elements (so + // pptxgenjs doesn't write a lossy version), and preserveUnknowns + // injects the patched fragment alongside pristines. Anything not + // patchable (color edits, font changes, run-level restyling, new + // elements) still flows through pptxgenjs as before. + const patchedBySlide = collectPatched(deck); + const skipElementIds = new Set(); + for (const group of patchedBySlide.values()) { + for (const id of group.elementIds) skipElementIds.add(id); + } + for (const slide of deck.slides) { - addSlide(pptx, slide); + addSlide(pptx, slide, skipElementIds); } // Use arraybuffer (universal: works in Node + browser, accepted by JSZip @@ -70,10 +84,14 @@ export async function serializeDeck( const generated = (await pptx.write({ outputType: "arraybuffer", })) as ArrayBuffer; - return preserveUnknowns(generated, deck, options.source); + return preserveUnknowns(generated, deck, options.source, patchedBySlide); } -function addSlide(pptx: pptxgen, slide: Slide): void { +function addSlide( + pptx: pptxgen, + slide: Slide, + skipElementIds: Set +): void { const s = pptx.addSlide(); s.background = { color: hexNoHash(slide.background) }; @@ -84,6 +102,9 @@ function addSlide(pptx: pptxgen, slide: Slide): void { // verbatim, sidestepping pptxgenjs's lossy translation of // gradient / custGeom / backing fields. if (isPristineImportedElement(el)) continue; + // Skip elements covered by patch-mode — preserveUnknowns will splice + // the patched OOXML into the slide. + if (skipElementIds.has(el.id)) continue; try { addElement(s, el); } catch (err) { @@ -341,7 +362,8 @@ function addEmbed(s: pptxgen.Slide, el: EmbedElement): void { async function preserveUnknowns( generated: ArrayBuffer, deck: Deck, - explicitSource?: Blob | ArrayBuffer | Uint8Array + explicitSource?: Blob | ArrayBuffer | Uint8Array, + patchedBySlide?: Map ): Promise { const wrapBlob = () => new Blob([generated], { type: PPTX_MIME }); // Prefer the caller-supplied source (survives state cloning / localStorage @@ -352,6 +374,7 @@ async function preserveUnknowns( const unknownsBySlide = collectUnknowns(deck); const pristinesBySlide = collectPristineImports(deck); + const patched = patchedBySlide ?? new Map(); const [outZip, srcZip] = await Promise.all([ JSZip.loadAsync(generated), @@ -366,11 +389,13 @@ async function preserveUnknowns( const slideIndices = new Set([ ...unknownsBySlide.keys(), ...pristinesBySlide.keys(), + ...patched.keys(), ]); const sortedIndices = [...slideIndices].sort((a, b) => a - b); for (const slideIndex of sortedIndices) { const unknownGroup = unknownsBySlide.get(slideIndex); const pristineGroup = pristinesBySlide.get(slideIndex); + const patchedGroup = patched.get(slideIndex); const generatedSlidePath = `ppt/slides/slide${slideIndex + 1}.xml`; const generatedRelsPath = `ppt/slides/_rels/slide${slideIndex + 1}.xml.rels`; if (!outZip.file(generatedSlidePath)) continue; @@ -387,10 +412,15 @@ async function preserveUnknowns( sourcePath: slideSourcePath, })) : []; - if ( - !unknownFragments.length && - !(pristineGroup?.fragments.length ?? 0) - ) { + // Patched fragments share injection mechanics with pristines (verbatim + // XML keyed off a sourcePath for r:id resolution + media copy) — they + // just carry edited content instead of the source content. Prepend + // them so they sit at the same z layer as the pristines they replaced. + const allPristines: PristineFragment[] = [ + ...(pristineGroup?.fragments ?? []), + ...(patchedGroup?.fragments ?? []), + ]; + if (!unknownFragments.length && !allPristines.length) { continue; } await injectIntoSlide( @@ -398,7 +428,7 @@ async function preserveUnknowns( srcZip, generatedSlidePath, generatedRelsPath, - pristineGroup?.fragments ?? [], + allPristines, unknownFragments ); } @@ -435,12 +465,13 @@ async function resolveSource( } return explicit.arrayBuffer(); } - // 1. Module-level cache keyed by Deck.sourcePptxId — survives spread, - // structuredClone, and JSON round-trip within the session, so any - // reducer-driven host (Zustand, Redux, useState, etc.) keeps the - // chrome / EMF / slide-bg preservation pipeline alive. + // 1. In-memory cache keyed by Deck.sourcePptxId, with IndexedDB fallback + // that survives page reloads. The id is enumerable so it survives + // structuredClone, object spread, and JSON round-trip — any + // reducer-driven host (Zustand, Redux, useState, Immer) keeps the + // preservation pipeline alive across edits AND reloads. if (deck.sourcePptxId) { - const cached = getCachedSourceBuffer(deck.sourcePptxId); + const cached = await getCachedSourceBufferAsync(deck.sourcePptxId); if (cached) return cached; } // 2. Legacy non-enumerable attachment from parsePptx. Only present when @@ -472,6 +503,40 @@ interface PristineGroup { fragments: PristineFragment[]; } +interface PatchedGroup { + fragments: PristineFragment[]; + /** Ids of elements whose edits were absorbed by the patch fragments — + * addSlide must skip these so pptxgenjs doesn't emit a parallel + * (and lossy) copy. */ + elementIds: Set; +} + +/** + * For each slide, walk its elements and try to patch every edited one. + * "Edited" means the snapshot taken at parse time differs from the + * current values; "patchable" means the change pattern is one + * `tryPatchEditedElement` covers (text content, geometry). Charts and + * UnknownElements are skipped — they use their own re-injection paths. + */ +function collectPatched(deck: Deck): Map { + const out = new Map(); + for (let i = 0; i < deck.slides.length; i++) { + const slide = deck.slides[i]; + const fragments: PristineFragment[] = []; + const elementIds = new Set(); + for (const el of slide.elements) { + if (el.type === "unknown" || el.type === "chart") continue; + const patched = tryPatchEditedElement(el); + if (!patched) continue; + fragments.push({ xml: patched.xml, sourcePath: patched.sourcePath }); + elementIds.add(el.id); + } + if (!fragments.length) continue; + out.set(i, { fragments, elementIds }); + } + return out; +} + function collectPristineImports(deck: Deck): Map { const out = new Map(); for (let i = 0; i < deck.slides.length; i++) { @@ -484,6 +549,11 @@ function collectPristineImports(deck: Deck): Map { if (el.type === "unknown" || el.type === "chart") continue; const src = getElementSource(el.id); if (!src) continue; + // Placeholder-inherited shapes (no explicit xfrm in source) can't be + // pristine-re-injected — pptxgenjs's regenerated layouts wouldn't + // resolve their position. Patch-mode handles them separately by + // splicing in geometry. Skip pristine here. + if (!src.hasXfrm) continue; if (src.snapshot !== snapshotElement(el)) continue; fragments.push({ xml: src.xml, sourcePath: src.slidePath }); } diff --git a/packages/slidewise/src/lib/pptx/patchEdited.ts b/packages/slidewise/src/lib/pptx/patchEdited.ts new file mode 100644 index 0000000..a598598 --- /dev/null +++ b/packages/slidewise/src/lib/pptx/patchEdited.ts @@ -0,0 +1,255 @@ +/** + * Patch-mode save path: when an element has been edited but the edit only + * touches fields we know how to splice into the original OOXML (text + * content, geometry, run text), patch the source `` / `` / + * `` instead of regenerating via pptxgenjs. Everything + * else in the source — gradient fills, `` paths, scheme- + * referenced text colors, themed fonts, run-level emphasis, `` + * shadows, body padding, autofit hints — survives verbatim. + * + * Patterned after Univer's approach to Office docs: edit the source + * document tree in place, never round-trip through a lossy intermediate + * model. pptxgenjs is reserved as a last-resort fallback for unpatchable + * cases (new elements with no source, complex multi-run text re-styling, + * shape kind changes, etc.). + */ +import type { SlideElement, TextElement, TextRun } from "@/lib/types"; +import { EMU_PER_PX } from "./units"; +import { getElementSourceParsed, snapshotElement } from "./pptxToDeck"; + +export interface PatchResult { + xml: string; + sourcePath: string; +} + +/** + * Field categories used for patch eligibility. Each entry maps a field + * name to the patch kind that covers it. If an edit only touches fields + * within a single covered category, we can splice the source XML; if it + * crosses into uncovered territory (font weight, fill kind, runs with + * mixed styling), we fall back to pptxgenjs. + */ +const GEOM_FIELDS = new Set(["x", "y", "w", "h", "rotation"]); +const TEXT_CONTENT_FIELDS = new Set(["text"]); + +/** + * If the element's edit pattern is patchable, return the patched OOXML + * (with its source slide path so injectIntoSlide can resolve r:id refs + * against the right rels file). Otherwise null — the caller should fall + * back to pptxgenjs's emitter for that element. + */ +export function tryPatchEditedElement(el: SlideElement): PatchResult | null { + const src = getElementSourceParsed(el.id); + if (!src) return null; + const cur = JSON.parse(snapshotElement(el)) as Record; + + const changed = diffFields(src.snapshot, cur); + if (changed.size === 0) return null; // pristine — caller handles separately + + // Detect which patch categories the changes need. + const needsGeom = anyIn(changed, GEOM_FIELDS); + const needsText = anyIn(changed, TEXT_CONTENT_FIELDS); + const otherChanges = [...changed].filter( + (f) => !GEOM_FIELDS.has(f) && !TEXT_CONTENT_FIELDS.has(f) + ); + + // Anything we can't patch (color, font, runs, shape kind, fill, etc.) + // → bail. The runs field needs special handling — if the user only + // edited text and the style is homogeneous, the editor preserves runs + // unchanged, but the comparison sees them as equal because both sides + // serialize identically. + if (otherChanges.length > 0) return null; + + // For text elements: patching arbitrary text content into multi-run + // text would lose the run structure. Restrict to single-run (or no-runs) + // sources. The editor collapses heterogeneous edits back to a flat run + // structure that pptxgenjs CAN write — but at the cost of losing themed + // colors, so a separate (future) patch path that rebuilds the txBody + // from runs while preserving paragraph-level pPr would help. + if (needsText && el.type === "text") { + const txt = (el as TextElement).text; + const runs = (el as TextElement).runs; + if (runs && runs.length > 1 && !runsAreHomogeneous(runs)) { + return null; + } + let xml = patchSingleParagraphText(src.xml, txt, runs); + if (xml == null) return null; + // Splice geometry whenever it changed OR when the source had none + // (placeholder-inherited shapes — without an explicit xfrm in the + // saved output the layout's resolved position would be ambiguous). + if (needsGeom || !src.hasXfrm) { + const geomed = patchGeometry(xml, el); + if (geomed == null) return null; + xml = geomed; + } + return { xml, sourcePath: src.slidePath }; + } + + // Pure geometry change on any element type. + if (needsGeom && !needsText) { + const xml = patchGeometry(src.xml, el); + if (xml == null) return null; + return { xml, sourcePath: src.slidePath }; + } + + return null; +} + +function diffFields( + before: Record, + after: Record +): Set { + const keys = new Set([...Object.keys(before), ...Object.keys(after)]); + const out = new Set(); + for (const k of keys) { + if (JSON.stringify(before[k]) !== JSON.stringify(after[k])) out.add(k); + } + return out; +} + +function anyIn(set: Set, target: Set): boolean { + for (const v of set) if (target.has(v)) return true; + return false; +} + +function runsAreHomogeneous(runs: TextRun[]): boolean { + if (runs.length <= 1) return true; + const first = runs[0]; + return runs.every( + (r) => + r.fontFamily === first.fontFamily && + r.fontSize === first.fontSize && + r.fontWeight === first.fontWeight && + r.italic === first.italic && + r.underline === first.underline && + r.strike === first.strike && + r.color === first.color && + r.letterSpacing === first.letterSpacing + ); +} + +/** + * Splice new text content into the source ``, preserving every + * paragraph-level `` and run-level `` that was on the source. + * Strategy: + * 1. Locate the single `` inside the source ``. + * 2. Capture the first paragraph's `` (if any) and first run's + * `` (if any) — these carry bullets, alignment, themed font / + * colour refs, autofit, etc. + * 3. Split the new text on `\n` into paragraphs. + * 4. For each paragraph, emit `` + the captured `` (if it + * was on the source's first paragraph) + a single `` carrying + * the captured `` + the new ``. + * 5. Splice the rebuilt `` back into the source XML. + * Returns null when the source has no `` or the structure isn't + * one we recognise, so the caller can fall through to pptxgenjs. + */ +function patchSingleParagraphText( + xml: string, + newText: string, + runs: TextRun[] | undefined +): string | null { + const bodyOpenRe = /]*>/; + const bodyOpenMatch = bodyOpenRe.exec(xml); + if (!bodyOpenMatch) return null; + const bodyOpenEnd = bodyOpenMatch.index + bodyOpenMatch[0].length; + const bodyCloseIdx = xml.indexOf("", bodyOpenEnd); + if (bodyCloseIdx < 0) return null; + const innerBody = xml.slice(bodyOpenEnd, bodyCloseIdx); + + // Preserve and verbatim — autofit, insets, list + // defaults are template chrome that shouldn't change with text edits. + const bodyPrMatch = /|<\/a:bodyPr>)/.exec(innerBody); + const lstStyleMatch = /|<\/a:lstStyle>)/.exec( + innerBody + ); + const bodyPr = bodyPrMatch?.[0] ?? ""; + const lstStyle = lstStyleMatch?.[0] ?? ""; + + // Capture the first paragraph's pPr and the first run's rPr — these + // carry the template formatting (bullets, themed colors, fonts) that + // pptxgenjs would otherwise drop. Match both self-closing + // (``) and open/close + // (`...`) forms verbatim. + const firstPMatch = /]*>([\s\S]*?)<\/a:p>/.exec(innerBody); + if (!firstPMatch) return null; + const firstPInner = firstPMatch[1]; + const pPrMatch = + /]*>[\s\S]*?<\/a:pPr>|]*\/>/.exec(firstPInner); + const firstRunMatch = /]*>([\s\S]*?)<\/a:r>/.exec(firstPInner); + const rPrInRun = firstRunMatch + ? /]*>[\s\S]*?<\/a:rPr>|]*\/>/.exec(firstRunMatch[1]) + : null; + const pPr = pPrMatch?.[0] ?? ""; + const rPr = rPrInRun?.[0] ?? ""; + + // Split the edited text on \n into paragraphs. Empty lines become + // empty paragraphs with the same pPr — PowerPoint convention. + const paragraphs = newText.split("\n"); + const rebuiltParas = paragraphs + .map((line) => { + if (line.length === 0) { + return `${pPr}`; + } + return `${pPr}${rPr}${escapeXml(line)}`; + }) + .join(""); + + const rebuiltInner = `${bodyPr}${lstStyle}${rebuiltParas}`; + void runs; // future: multi-run patch path + return xml.slice(0, bodyOpenEnd) + rebuiltInner + xml.slice(bodyCloseIdx); +} + +/** + * Replace (or insert) the `` on a `` / `` / + * `` to match the edited geometry, leaving everything + * else in `` (preset/custom geometry, fills, line, effects) + * untouched. Returns null only if the source XML doesn't have a + * recognisable spPr we can splice into. + */ +function patchGeometry(xml: string, el: SlideElement): string | null { + const offX = Math.round(el.x * EMU_PER_PX); + const offY = Math.round(el.y * EMU_PER_PX); + const extX = Math.round(el.w * EMU_PER_PX); + const extY = Math.round(el.h * EMU_PER_PX); + // PPTX rotation is in 60000ths of a degree; positive = clockwise. + const rotUnits = Math.round((el.rotation || 0) * 60000); + const rotAttr = rotUnits ? ` rot="${rotUnits}"` : ""; + const newXfrm = ``; + + // `` carries its xfrm as `` (note the p: prefix) + // directly under the frame; everything else uses `` inside + // `` / ``. Handle both. + if (/`; + if (//, gfXfrm); + } + // Insert right after the closing tag. + return xml.replace( + /<\/p:nvGraphicFramePr>/, + `${gfXfrm}` + ); + } + + // / / path: xfrm lives inside . + if (/|/, newXfrm); + } + // No xfrm in the source spPr (placeholder-inherited) — inject one. + if (/]*>/.test(xml)) { + return xml.replace(/]*>/, (m) => `${m}${newXfrm}`); + } + // Self-closing `` — convert to open/close and inject. + if (//.test(xml)) { + return xml.replace(//, `${newXfrm}`); + } + return null; +} + +function escapeXml(s: string): string { + return s + .replace(/&/g, "&") + .replace(//g, ">"); +} diff --git a/packages/slidewise/src/lib/pptx/pptxToDeck.ts b/packages/slidewise/src/lib/pptx/pptxToDeck.ts index 1e42d5e..eb2b78e 100644 --- a/packages/slidewise/src/lib/pptx/pptxToDeck.ts +++ b/packages/slidewise/src/lib/pptx/pptxToDeck.ts @@ -313,6 +313,12 @@ export async function parsePptx( // a redundant fallback for callers that hold the deck object directly. const sourcePptxId = nanoid(12); sourceBufferCache.set(sourcePptxId, sourceBuffer); + // Mirror to IndexedDB best-effort so the bytes survive page reloads. + // Fire-and-forget — host code doesn't need to await this for parse to + // resolve, and a write failure (private mode, quota exceeded) is + // recoverable: serializeDeck will fall back to options.source or the + // SOURCE_PPTX attachment if the IndexedDB layer comes up empty later. + void idbPutSource(sourcePptxId, sourceBuffer).catch(() => {}); const deck: Deck = { version: CURRENT_DECK_VERSION, title, @@ -343,7 +349,14 @@ export const SOURCE_SLIDE_PATH = "__slidewiseSourceSlidePath"; * pass `options.source` and the non-enumerable `SOURCE_PPTX` attachment * has been stripped (which happens the moment any reducer spreads the deck * or any history snapshot is taken). In-memory only — survives clones - * within a session but not page reloads. + * within a session. + * + * For cross-session survival (page reloads, where the in-memory cache is + * empty for a fresh module instance), bytes are also persisted to + * IndexedDB under the same `sourcePptxId` key. `getCachedSourceBufferAsync` + * checks both layers; the sync `getCachedSourceBuffer` accessor only + * returns the in-memory entry (useful for non-async callers, but won't + * hydrate from disk). */ const sourceBufferCache = new Map(); @@ -351,6 +364,101 @@ export function getCachedSourceBuffer(id: string): ArrayBuffer | undefined { return sourceBufferCache.get(id); } +export async function getCachedSourceBufferAsync( + id: string +): Promise { + const inMemory = sourceBufferCache.get(id); + if (inMemory) return inMemory; + const fromDisk = await idbGetSource(id); + if (fromDisk) { + // Promote to in-memory so subsequent saves don't pay IDB latency. + sourceBufferCache.set(id, fromDisk); + } + return fromDisk ?? undefined; +} + +// --------------------------------------------------------------------------- +// IndexedDB-backed source persistence +// +// Hosts that mutate the deck through reducer pipelines (Zustand, Redux, +// useState, Immer) AND persist the deck to localStorage / sessionStorage +// AND rehydrate on page reload lose access to source bytes — the in-memory +// cache is empty for the fresh module instance, and the host's persisted +// deck JSON doesn't carry the bytes themselves. +// +// To make `serializeDeck` work across reloads without forcing every host +// to wire source persistence, we mirror the in-memory cache to IndexedDB +// keyed by `sourcePptxId`. Writes are best-effort and fire-and-forget; +// reads happen lazily inside `serializeDeck`. Falls back cleanly in +// non-browser environments (SSR, Node tests). +// --------------------------------------------------------------------------- + +const IDB_DB_NAME = "slidewise-pptx"; +const IDB_STORE_NAME = "source-bytes"; +const IDB_DB_VERSION = 1; + +function getIDB(): IDBFactory | null { + if (typeof indexedDB === "undefined") return null; + return indexedDB; +} + +function openSourceDb(): Promise { + const idb = getIDB(); + if (!idb) return Promise.resolve(null); + return new Promise((resolve) => { + const req = idb.open(IDB_DB_NAME, IDB_DB_VERSION); + req.onupgradeneeded = () => { + const db = req.result; + if (!db.objectStoreNames.contains(IDB_STORE_NAME)) { + db.createObjectStore(IDB_STORE_NAME); + } + }; + req.onsuccess = () => resolve(req.result); + req.onerror = () => resolve(null); + req.onblocked = () => resolve(null); + }); +} + +async function idbPutSource(id: string, bytes: ArrayBuffer): Promise { + const db = await openSourceDb(); + if (!db) return; + await new Promise((resolve) => { + try { + const tx = db.transaction(IDB_STORE_NAME, "readwrite"); + tx.objectStore(IDB_STORE_NAME).put(bytes, id); + tx.oncomplete = () => resolve(); + tx.onerror = () => resolve(); + tx.onabort = () => resolve(); + } catch { + resolve(); + } + }); + db.close(); +} + +async function idbGetSource(id: string): Promise { + const db = await openSourceDb(); + if (!db) return undefined; + return new Promise((resolve) => { + try { + const tx = db.transaction(IDB_STORE_NAME, "readonly"); + const req = tx.objectStore(IDB_STORE_NAME).get(id); + req.onsuccess = () => { + const v = req.result; + db.close(); + resolve(v instanceof ArrayBuffer ? v : undefined); + }; + req.onerror = () => { + db.close(); + resolve(undefined); + }; + } catch { + db.close(); + resolve(undefined); + } + }); +} + /** * Per-element source-XML registry. Keyed by `SlideElement.id`, holds the * verbatim OOXML for every imported element + a snapshot of its semantic @@ -371,6 +479,12 @@ interface ElementSource { snapshot: string; /** Source slide path the XML came from — used to resolve rels/media. */ slidePath: string; + /** True iff the source XML carried an explicit ``. Pristine + * re-injection requires this (else the shape has no geometry and the + * template's resolved positions are lost); patch-mode handles the + * no-xfrm case by always splicing the edited geometry into the patched + * XML, so it can still operate on placeholder-inherited shapes. */ + hasXfrm: boolean; } const elementSourceRegistry = new Map(); @@ -379,6 +493,31 @@ export function getElementSource(elementId: string): ElementSource | undefined { return elementSourceRegistry.get(elementId); } +export interface ElementSourceWithSnapshot { + xml: string; + snapshot: Record; + slidePath: string; + hasXfrm: boolean; +} + +/** + * Like getElementSource but pre-parses the snapshot JSON. Used by the + * patch-mode save path so it can diff field-by-field without re-parsing + * the snapshot string for every element on every save. + */ +export function getElementSourceParsed( + elementId: string +): ElementSourceWithSnapshot | undefined { + const entry = elementSourceRegistry.get(elementId); + if (!entry) return undefined; + return { + xml: entry.xml, + snapshot: JSON.parse(entry.snapshot) as Record, + slidePath: entry.slidePath, + hasXfrm: entry.hasXfrm, + }; +} + export function snapshotElement(element: SlideElement): string { // Hash only fields the user can change in the editor. Element `id` and // `z` are intentionally excluded — they may be reassigned by the store @@ -428,17 +567,19 @@ function registerElementSource( slidePath: string ): void { if (!rawXml) return; - // Skip elements whose source XML relies on placeholder geometry - // inheritance (no explicit ). pptxgenjs writes its own - // slideLayouts on save, so on re-parse those inherited positions are - // gone — re-injecting the XML would produce a geom-less that - // falls into UnknownElement. Letting pptxgenjs emit them instead - // bakes the resolved coords into the output. - if (!hasExplicitXfrm(rawXml)) return; + // Register every element with source XML, even placeholder-inherited + // shapes that don't carry an explicit . The pristine pass + // continues to require an xfrm (re-injecting without one would produce + // a geom-less shape on re-parse), but patch-mode handles the no-xfrm + // case by always splicing the current geometry into the patched + // output — that keeps themed fills / fonts on placeholder-inherited + // elements survivable across edits. + const hasXfrm = hasExplicitXfrm(rawXml); elementSourceRegistry.set(element.id, { xml: rawXml, snapshot: snapshotElement(element), slidePath, + hasXfrm, }); } From 783f37b2d8b456eaba50e040ebc6d7eef9120347 Mon Sep 17 00:00:00 2001 From: karthikmudunuri <102793643+karthikmudunuri@users.noreply.github.com> Date: Fri, 15 May 2026 18:18:00 +0530 Subject: [PATCH 2/2] fix(pptx): patch-mode must produce structurally-sound XML MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three things were dropping shapes in the editor's saved output after the patch-mode landing: 1. Self-closing bug. The geometry-injection regex `]*>` matched both `` AND `` (the `/` sits inside `[^>]*`), so for placeholder-only shapes whose source spPr was empty/self-closing, we emitted `` — xfrm OUTSIDE the spPr container, invalid OOXML, and PowerPoint silently dropped the shape. Reorder the regex chain so the self-closing form is matched FIRST and converted to an open/close pair with xfrm INSIDE. 2. Add a final structural sanity check after any patch — count opening vs closing tags for p:sp / p:pic / p:graphicFrame / p:spPr / p:txBody / a:p / a:r / a:t / a:xfrm. If they don't balance, or the shape container count drifts from the source, fall back to pptxgenjs's lossy emitter instead of shipping broken XML. 3. Regression test edits every text element on eon-deck slide 10 (placeholder-only spPrs across the board) and asserts the saved slide has no ` { } ); + it.skipIf(!hasEon)( + "produces well-formed XML when the source has self-closing ", + async () => { + // eon-deck slide 10 has placeholders whose slide-level spPr is empty + // (purely inheriting from layout). The importer registers them + // without an explicit xfrm; patch-mode has to splice xfrm INSIDE + // the spPr container, not after a self-closing tag. A previous + // version of this code emitted `…` which is + // invalid OOXML — PowerPoint silently dropped the shape. + const buf = await readFile(path.join(attachmentsDir, "eon-deck-v1.pptx")); + const source = buf.buffer.slice( + buf.byteOffset, + buf.byteOffset + buf.byteLength + ) as ArrayBuffer; + const deck = await parsePptx(source); + const slide10 = deck.slides[9]; + // Edit every text element on the slide and confirm none of them + // produce malformed XML. + for (const el of slide10.elements) { + if (el.type === "text") (el as TextElement).text += "!"; + } + const blob = await serializeDeck(deck, { source }); + const out = await JSZip.loadAsync(await blob.arrayBuffer()); + const xml = await out.file("ppt/slides/slide10.xml")!.async("string"); + // Every shape's spPr must be either self-closing OR balanced. + // No `]*\/\s*>\s*/g) ?? []).length; + const close = (xml.match(/<\/p:spPr>/g) ?? []).length; + expect(open).toBe(close); + } + ); + it.skipIf(!hasEon)( "moves an element via geometry-only patch, keeping fill / themed color verbatim", async () => { diff --git a/packages/slidewise/src/lib/pptx/patchEdited.ts b/packages/slidewise/src/lib/pptx/patchEdited.ts index a598598..9cace3a 100644 --- a/packages/slidewise/src/lib/pptx/patchEdited.ts +++ b/packages/slidewise/src/lib/pptx/patchEdited.ts @@ -66,33 +66,89 @@ export function tryPatchEditedElement(el: SlideElement): PatchResult | null { // structure that pptxgenjs CAN write — but at the cost of losing themed // colors, so a separate (future) patch path that rebuilds the txBody // from runs while preserving paragraph-level pPr would help. + let patched: string | null = null; if (needsText && el.type === "text") { const txt = (el as TextElement).text; const runs = (el as TextElement).runs; if (runs && runs.length > 1 && !runsAreHomogeneous(runs)) { return null; } - let xml = patchSingleParagraphText(src.xml, txt, runs); - if (xml == null) return null; + patched = patchSingleParagraphText(src.xml, txt, runs); + if (patched == null) return null; // Splice geometry whenever it changed OR when the source had none // (placeholder-inherited shapes — without an explicit xfrm in the // saved output the layout's resolved position would be ambiguous). if (needsGeom || !src.hasXfrm) { - const geomed = patchGeometry(xml, el); + const geomed = patchGeometry(patched, el); if (geomed == null) return null; - xml = geomed; + patched = geomed; } - return { xml, sourcePath: src.slidePath }; + } else if (needsGeom && !needsText) { + // Pure geometry change on any element type. + patched = patchGeometry(src.xml, el); + if (patched == null) return null; } - // Pure geometry change on any element type. - if (needsGeom && !needsText) { - const xml = patchGeometry(src.xml, el); - if (xml == null) return null; - return { xml, sourcePath: src.slidePath }; + if (patched == null) return null; + + // Final safety net: if anything in the patch path produced malformed + // OOXML (mismatched tag counts, broken nesting), PowerPoint may silently + // drop the entire shape on open — far worse than pptxgenjs's lossy + // emitter. Fall back to pptxgenjs whenever the structure looks off. + if (!looksStructurallySound(src.xml, patched)) return null; + + return { xml: patched, sourcePath: src.slidePath }; +} + +/** + * Cheap structural sanity check: compare counts of major OOXML tag pairs + * between source and patched. Any drift means the patch garbled the + * structure and we should fall back to pptxgenjs rather than ship broken + * XML to PowerPoint. + * + * Not a full XML validator — we'd pay parser cost on every element on + * every save. Catches the regex edge cases that have bitten us + * (self-closing ``, mismatched rPr capture, etc.) without the + * overhead. + */ +function looksStructurallySound(src: string, patched: string): boolean { + const tagsToCheck = [ + "p:sp", + "p:pic", + "p:graphicFrame", + "p:cxnSp", + "p:spPr", + "p:txBody", + "a:p", + "a:r", + "a:t", + "a:xfrm", + ]; + for (const tag of tagsToCheck) { + const open = countMatches(patched, new RegExp(`<${tag}\\b[^/]*>`, "g")); + const close = countMatches(patched, new RegExp(``, "g")); + const selfClose = countMatches( + patched, + new RegExp(`<${tag}\\b[^>]*\\/>`, "g") + ); + if (open !== close) return false; + // Track the same in the source — patch shouldn't have produced more + // top-level shape containers than source had (which would mean the + // patch fragment now wraps stuff it shouldn't). + const srcOpen = countMatches(src, new RegExp(`<${tag}\\b[^/]*>`, "g")); + const srcSelfClose = countMatches( + src, + new RegExp(`<${tag}\\b[^>]*\\/>`, "g") + ); + if (tag === "p:sp" || tag === "p:pic" || tag === "p:graphicFrame") { + if (open + selfClose !== srcOpen + srcSelfClose) return false; + } } + return true; +} - return null; +function countMatches(s: string, re: RegExp): number { + return (s.match(re) ?? []).length; } function diffFields( @@ -236,14 +292,19 @@ function patchGeometry(xml: string, el: SlideElement): string | null { if (/|/, newXfrm); } - // No xfrm in the source spPr (placeholder-inherited) — inject one. + // Self-closing `` first — convert to open/close and put xfrm + // INSIDE. Must come before the open-tag branch because the open-tag + // regex (`]*>`) also matches `` (the `/` is a + // legal character in `[^>]*`), and inserting xfrm after `` + // would put it OUTSIDE the spPr container — invalid OOXML that + // PowerPoint silently drops the shape over. + if (/]*\/\s*>/.test(xml)) { + return xml.replace(/]*\/\s*>/, `${newXfrm}`); + } + // Open/close form: insert immediately after the opening tag. if (/]*>/.test(xml)) { return xml.replace(/]*>/, (m) => `${m}${newXfrm}`); } - // Self-closing `` — convert to open/close and inject. - if (//.test(xml)) { - return xml.replace(//, `${newXfrm}`); - } return null; }