From 3c4a6a8234efbcc4fb2e4a5b3d249c332159577f Mon Sep 17 00:00:00 2001 From: karthikmudunuri <102793643+karthikmudunuri@users.noreply.github.com> Date: Wed, 13 May 2026 22:45:30 +0530 Subject: [PATCH 1/2] fix(pptx): preserve masters/layouts/theme/fonts/bg + EMF fallback on save Three stacked regressions caused client decks to lose chunks of content after a single text edit: * pptxgenjs regenerates its own slideMasters/, slideLayouts/, theme/ and never emits fonts/. preserveUnknowns now copies the source's chrome (incl. notesMasters, handoutMasters, tags) into the generated zip, splices source's sldMasterIdLst / notesMasterIdLst / embeddedFontLst into presentation.xml, rewrites presentation.xml.rels + each slide's rels to point at the originals, updates [Content_Types].xml, and copies referenced media (renamed on collision with pptxgenjs's media). Bails when source/output aspect ratios differ so 4:3 sources don't get their chrome stretched onto a 16:9 canvas. * pptxgenjs's slide.background only emits flat-hex solidFill, collapsing theme refs / gradients / image-fills. A new per-slide pass copies the source slide's verbatim (with r:id rewrite for image fills), or strips the flat-hex stand-in when the source inherits from the layout / master. * EMF/WMF decode failures used to return null from parsePic; combined with upstream catches this could wipe every element on the same slide (Dickinson slides 2/3/9). The fallback now returns an UnknownElement so the source is re-injected verbatim and PowerPoint renders the metafile natively. Validated end-to-end on Dickinson_Sample_Slides.pptx (9/9 slides retain content, slide 2's schemeClr tx1 theme bg survives) and eon-deck.pptx (28 layouts, 5 embedded fonts, 3 themes preserved). --- .../preserve-chrome-and-emf-fallback.md | 15 + .../__tests__/chrome-preservation.test.ts | 123 +++ packages/slidewise/src/lib/pptx/deckToPptx.ts | 709 +++++++++++++++++- packages/slidewise/src/lib/pptx/pptxToDeck.ts | 14 +- 4 files changed, 853 insertions(+), 8 deletions(-) create mode 100644 .changeset/preserve-chrome-and-emf-fallback.md create mode 100644 packages/slidewise/src/lib/pptx/__tests__/chrome-preservation.test.ts diff --git a/.changeset/preserve-chrome-and-emf-fallback.md b/.changeset/preserve-chrome-and-emf-fallback.md new file mode 100644 index 0000000..30f3d15 --- /dev/null +++ b/.changeset/preserve-chrome-and-emf-fallback.md @@ -0,0 +1,15 @@ +--- +"@textcortex/slidewise": minor +--- + +**Stop losing slide masters, layouts, themes, embedded fonts, slide backgrounds, and EMF-bearing slides on save.** + +Three stacked regressions made client decks lose huge chunks of content after a single text edit: + +- pptxgenjs regenerates its own `ppt/slideMasters/`, `ppt/slideLayouts/`, `ppt/theme/`, and never emits `ppt/fonts/`. On save the original chrome was thrown out — taking master-level backgrounds, brand bars, page numbers, footers, theme palettes, and embedded brand fonts with it. `preserveUnknowns` now copies these directories (plus `notesMasters`, `handoutMasters`, `tags`) from the source PPTX into the generated zip, splices the source's `` / `` / `` into `presentation.xml`, rewrites `presentation.xml.rels` and each slide's rels to point at the original layouts, updates `[Content_Types].xml`, and copies referenced master/layout media (renamed on collision with pptxgenjs's own media). Bails safely when source and output slide-size aspect ratios differ so 4:3 sources don't get their masters stretched onto a 16:9 canvas. + +- pptxgenjs's `slide.background` only emits a flat-hex ``, collapsing gradient / image-fill / theme-referenced backgrounds (e.g. `` → ``). A new per-slide pass copies the source slide's `` element verbatim into the output, rewriting r:id references for image-fill backgrounds and dropping the output's flat-hex stand-in when the source inherits from layout / master. + +- EMF/WMF decode failures used to return `null` from the picture parser. Combined with upstream catches, a single un-decodable metafile could wipe every other element on the same slide (Dickinson sample slides 2, 3, 9 — title + subtitle + logo all gone after one text edit). The fallback now returns an `UnknownElement` so the source `` is re-injected verbatim and the EMF reference survives for PowerPoint to render natively. + +Validated on `Dickinson_Sample_Slides.pptx` (9/9 slides retain content + slide 2's `` theme bg survives, vs 5/9 empty slides before) and `eon-deck.pptx` (28 layouts, 5 embedded fonts, and 3 themes preserved, vs 1/0/1 before). diff --git a/packages/slidewise/src/lib/pptx/__tests__/chrome-preservation.test.ts b/packages/slidewise/src/lib/pptx/__tests__/chrome-preservation.test.ts new file mode 100644 index 0000000..2ccea47 --- /dev/null +++ b/packages/slidewise/src/lib/pptx/__tests__/chrome-preservation.test.ts @@ -0,0 +1,123 @@ +import { describe, it, expect } from "vitest"; +import { readFile } from "node:fs/promises"; +import { fileURLToPath } from "node:url"; +import path from "node:path"; +import JSZip from "jszip"; +import { parsePptx, serializeDeck } from "../index"; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); +const attachmentsDir = path.resolve( + __dirname, + "../../../../../../.context/attachments" +); + +async function loadFixture(name: string): Promise { + const buf = await readFile(path.join(attachmentsDir, name)); + return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength) as ArrayBuffer; +} + +async function listZipPaths(buf: ArrayBuffer | Blob): Promise> { + const ab = buf instanceof Blob ? await buf.arrayBuffer() : buf; + const zip = await JSZip.loadAsync(ab); + const paths = new Set(); + zip.forEach((p) => paths.add(p)); + return paths; +} + +async function countSlidesWithSpTreeChildren( + buf: Blob +): Promise { + const zip = await JSZip.loadAsync(await buf.arrayBuffer()); + let count = 0; + const slidePaths: string[] = []; + zip.forEach((p) => { + if ( + p.startsWith("ppt/slides/slide") && + p.endsWith(".xml") && + !p.includes("/_rels/") + ) { + slidePaths.push(p); + } + }); + for (const p of slidePaths) { + const xml = await zip.file(p)!.async("string"); + // Anything inside spTree beyond the bookkeeping group counts. + if ( + / { + it("preserves slide masters / layouts / theme / fonts on a 16:9 source (eon-deck)", async () => { + const source = await loadFixture("eon-deck.pptx"); + + const deck = await parsePptx(source); + const blob = await serializeDeck(deck, { source }); + + const outPaths = await listZipPaths(blob); + const srcPaths = await listZipPaths(source); + + // Every master, layout, and font from the source should survive. + const srcLayouts = [...srcPaths].filter( + (p) => + p.startsWith("ppt/slideLayouts/") && + p.endsWith(".xml") && + !p.includes("/_rels/") + ); + const outLayouts = [...outPaths].filter( + (p) => + p.startsWith("ppt/slideLayouts/") && + p.endsWith(".xml") && + !p.includes("/_rels/") + ); + expect(outLayouts.length).toBe(srcLayouts.length); + + const srcFonts = [...srcPaths].filter( + (p) => p.startsWith("ppt/fonts/") && !p.endsWith("/") + ); + const outFonts = [...outPaths].filter( + (p) => p.startsWith("ppt/fonts/") && !p.endsWith("/") + ); + expect(outFonts.length).toBe(srcFonts.length); + for (const f of srcFonts) expect(outFonts).toContain(f); + + // Theme should round-trip. + expect(outPaths.has("ppt/theme/theme1.xml")).toBe(true); + }); + + it("keeps slide content intact when the source has EMF pictures (Dickinson)", async () => { + const source = await loadFixture("Dickinson_Sample_Slides.pptx"); + + const deck = await parsePptx(source); + expect(deck.slides.length).toBe(9); + + // Slides 2, 3, 9 in the source carry EMF logos. After the EMF-decode fix + // they should still ship element content (either re-rendered images or + // UnknownElement placeholders that round-trip verbatim) rather than + // dropping their entire spTree. + for (const slideIndex of [1, 2, 8]) { + expect(deck.slides[slideIndex].elements.length).toBeGreaterThan(0); + } + + const blob = await serializeDeck(deck, { source }); + const nonEmptySlides = await countSlidesWithSpTreeChildren(blob); + // All 9 slides should have visible content after save. + expect(nonEmptySlides).toBe(9); + + // Slide 2's background is `` in the source. After + // save it should still reference the theme color, not collapse to the + // flat `` pptxgenjs would have written. + const zip = await JSZip.loadAsync(await blob.arrayBuffer()); + const slide2 = await zip.file("ppt/slides/slide2.xml")!.async("string"); + expect(slide2).toContain(''); + expect(slide2).not.toContain(''); + }); +}); diff --git a/packages/slidewise/src/lib/pptx/deckToPptx.ts b/packages/slidewise/src/lib/pptx/deckToPptx.ts index c8ce3dc..eda2746 100644 --- a/packages/slidewise/src/lib/pptx/deckToPptx.ts +++ b/packages/slidewise/src/lib/pptx/deckToPptx.ts @@ -343,15 +343,15 @@ async function preserveUnknowns( explicitSource?: Blob | ArrayBuffer | Uint8Array ): Promise { const wrapBlob = () => new Blob([generated], { type: PPTX_MIME }); - const unknownsBySlide = collectUnknowns(deck); - const pristinesBySlide = collectPristineImports(deck); - if (!unknownsBySlide.size && !pristinesBySlide.size) return wrapBlob(); // Prefer the caller-supplied source (survives state cloning / localStorage // rehydrate); fall back to the non-enumerable attachment from parsePptx // for the "parse → serialize" happy path with no state in between. const sourceBuffer = await resolveSource(deck, explicitSource); if (!sourceBuffer) return wrapBlob(); + const unknownsBySlide = collectUnknowns(deck); + const pristinesBySlide = collectPristineImports(deck); + const [outZip, srcZip] = await Promise.all([ JSZip.loadAsync(generated), JSZip.loadAsync(sourceBuffer), @@ -402,6 +402,21 @@ async function preserveUnknowns( ); } + // Replace pptxgenjs's regenerated chrome (slide masters, layouts, theme, + // notes master, embedded fonts) with the source's. Without this, every + // background, brand bar, gradient, embedded font, and footer that lives + // on the master/layout disappears on save. Best-effort: bails when source + // and output slide size don't match so 4:3 sources don't get their + // masters stretched onto a 16:9 canvas. + await preserveDeckChrome(outZip, srcZip, deck, sourceSlidePaths); + + // Per-slide `` preservation. pptxgenjs's slide.background only + // emits solid colors, so gradient / image / theme-referenced + // backgrounds collapse to a flat hex through the model path. Replace + // each output slide's `` with the source's verbatim XML when + // available so gradients survive intact. + await preserveSlideBackgrounds(outZip, srcZip, deck, sourceSlidePaths); + // JSZip's blob output preserves the OOXML mime type set by pptxgenjs. return outZip.generateAsync({ type: "blob", mimeType: PPTX_MIME }); } @@ -752,6 +767,694 @@ function normalisePath(target: string, base: string): string { return [...segments, t].filter(Boolean).join("/"); } +// -- Deck chrome preservation ---------------------------------------------- + +/** + * Replace pptxgenjs's regenerated deck chrome (slide masters, layouts, theme, + * notes master, embedded fonts, tags, handout masters) with the originals + * from the source PPTX. Without this, anything that lives on the master or + * layout — backgrounds, brand bars, gradients, page numbers, embedded + * brand fonts — disappears the first time the deck is saved. + * + * Bails safely when the source's slide size doesn't match the output's + * (e.g. a 4:3 source written as 16:9): copying masters drawn at one + * aspect ratio onto slides authored at another would visually misalign + * the chrome. Future work: drive the output slide size from the source. + */ +const CHROME_PREFIXES = [ + "ppt/slideMasters/", + "ppt/slideLayouts/", + "ppt/theme/", + "ppt/fonts/", + "ppt/notesMasters/", + "ppt/handoutMasters/", + "ppt/tags/", +] as const; + +async function preserveDeckChrome( + outZip: JSZip, + srcZip: JSZip, + deck: Deck, + sourceSlidePaths: string[] +): Promise { + if (!(await aspectRatiosMatch(outZip, srcZip))) return; + + // 1. Find every chrome path that exists in the source. + const srcChromePaths = listPaths(srcZip, CHROME_PREFIXES); + if (!srcChromePaths.length) return; + + // 2. Remove pptxgenjs's chrome — we're about to overwrite with the source's, + // but pptxgenjs may have left files we don't replace (e.g. its single + // slideLayout1.xml when the source has 28 layouts named slideLayout1-28, + // or stale slideMaster overrides in [Content_Types].xml). + const outChromePaths = listPaths(outZip, CHROME_PREFIXES); + for (const p of outChromePaths) outZip.remove(p); + + // 3. Walk every chrome rels file in srcZip to discover the media payloads + // those masters / layouts / themes reference. These need to come along + // or the chrome XML will dangle on r:id references after the move. + const referencedMedia = await collectChromeMediaRefs(srcZip, srcChromePaths); + + // 4. Copy the chrome files themselves verbatim. JSZip lazily defers the + // actual byte copy until generateAsync, which is cheap. + for (const p of srcChromePaths) { + const f = srcZip.file(p); + if (!f) continue; + outZip.file(p, f.async("uint8array"), { binary: true }); + } + + // 5. Copy media payloads. pptxgenjs writes its own `ppt/media/imageN.*` + // with an unrelated numbering, so we need to rename on collision and + // rewrite the copied chrome rels to point at the renamed target. + const mediaRenames = new Map(); // source full path → out full path + for (const srcMediaPath of referencedMedia) { + const srcFile = srcZip.file(srcMediaPath); + if (!srcFile) continue; + let outMediaPath = srcMediaPath; + if (outZip.file(outMediaPath)) { + const slash = srcMediaPath.lastIndexOf("/"); + const dir = srcMediaPath.slice(0, slash + 1); + const base = srcMediaPath.slice(slash + 1); + let i = 0; + do { + outMediaPath = `${dir}slidewise_chrome_${i}_${base}`; + i++; + } while (outZip.file(outMediaPath)); + } + outZip.file(outMediaPath, srcFile.async("uint8array"), { binary: true }); + if (outMediaPath !== srcMediaPath) { + mediaRenames.set(srcMediaPath, outMediaPath); + } + } + if (mediaRenames.size) { + await rewriteChromeRelsForRenames(outZip, srcChromePaths, mediaRenames); + } + + // 6. [Content_Types].xml: drop the master/layout/theme/notesMaster overrides + // pptxgenjs declared (some of which point at files it never wrote — see + // the slideMaster1..9 overrides emitted with only slideMaster1.xml + // actually on disk) and add overrides for the files we just copied. + // Font extensions need a `` entry so PowerPoint embeds them. + await rewriteContentTypes(outZip, srcChromePaths); + + // 7. presentation.xml.rels: replace pptxgenjs's slideMaster / theme / + // notesMaster rels with the source's mapping. presentation.xml's + // / also get spliced from + // the source so multi-master decks (rare but real) round-trip. + await rewritePresentation(outZip, srcZip); + + // 8. Each slide's rels currently points at pptxgenjs's slideLayout1.xml, + // which we just deleted. Re-point each slide at the original layout + // its source counterpart used. New slides (added in-editor with no + // source path) fall back to the first source layout. + await rewriteSlideLayoutRefs(outZip, srcZip, deck, sourceSlidePaths); +} + +/** + * Replace each output slide's `` element with the source slide's + * `` verbatim. This is what keeps gradient / image-fill / theme- + * referenced backgrounds intact — pptxgenjs's slide.background only + * emits flat-hex solid fills, so anything fancier was collapsing on save. + * + * Image-fill backgrounds (``) + * have their r:id rewritten to a fresh slide-rels-scoped rId, with the + * referenced media copied across so the fill resolves. + */ +async function preserveSlideBackgrounds( + outZip: JSZip, + srcZip: JSZip, + deck: Deck, + sourceSlidePaths: string[] +): Promise { + for (let i = 0; i < deck.slides.length; i++) { + const slide = deck.slides[i]; + const sourceSlidePath = + ((slide as unknown as Record)[SOURCE_SLIDE_PATH] as + | string + | undefined) ?? sourceSlidePaths[i]; + if (!sourceSlidePath) continue; + const srcSlideFile = srcZip.file(sourceSlidePath); + if (!srcSlideFile) continue; + const srcXml = await srcSlideFile.async("string"); + const bgFragment = extractBgFragment(srcXml); + if (bgFragment == null) { + // Source slide had no explicit `` — it's inheriting from + // layout / master. Drop pptxgenjs's flat-hex bg so the inheritance + // chain can do its job once the original chrome is back in place. + await stripOutputBg(outZip, i); + continue; + } + await injectSlideBg(outZip, srcZip, i, sourceSlidePath, bgFragment); + } +} + +function extractBgFragment(slideXml: string): string | null { + const cSldOpen = slideXml.indexOf("", cSldOpen); + if (cSldClose < 0) return null; + const scope = slideXml.slice(cSldOpen, cSldClose); + const bgOpen = scope.indexOf("` is legal but expresses "no background"; treat + // as missing so inheritance kicks back in. + const selfClose = /]*\/\s*>/.exec(scope); + if (selfClose && selfClose.index === bgOpen) return null; + const bgClose = scope.indexOf("", bgOpen); + if (bgClose < 0) return null; + return scope.slice(bgOpen, bgClose + "".length); +} + +async function stripOutputBg(outZip: JSZip, slideIndex: number): Promise { + const outPath = `ppt/slides/slide${slideIndex + 1}.xml`; + const file = outZip.file(outPath); + if (!file) return; + const xml = await file.async("string"); + const updated = xml.replace(/|]*\/\s*>/, ""); + if (updated !== xml) outZip.file(outPath, updated); +} + +async function injectSlideBg( + outZip: JSZip, + srcZip: JSZip, + slideIndex: number, + sourceSlidePath: string, + bgFragment: string +): Promise { + const outSlidePath = `ppt/slides/slide${slideIndex + 1}.xml`; + const outRelsPath = `ppt/slides/_rels/slide${slideIndex + 1}.xml.rels`; + const outSlideFile = outZip.file(outSlidePath); + if (!outSlideFile) return; + const outXml = await outSlideFile.async("string"); + + let outRelsXml = + (await outZip.file(outRelsPath)?.async("string")) ?? + `\n`; + + // Rewrite r:embed / r:link references inside the bg fragment so they + // don't collide with rIds pptxgenjs already wrote into this slide's + // rels. Mirrors the rId-rewrite logic in injectIntoSlide but scoped + // to a single fragment + slide rels. + let rewritten = bgFragment; + if (/\br:(embed|link|id)="rId\d+"/.test(bgFragment)) { + const outRels = parseRels(outRelsXml); + let nextRid = + [...outRels.keys()].reduce((max, id) => { + const m = /^rId(\d+)$/.exec(id); + return m ? Math.max(max, Number(m[1])) : max; + }, 0) + 1; + const srcRelsXml = + (await srcZip + .file(relsPathFor(sourceSlidePath)) + ?.async("string")) ?? null; + const srcRels = parseRels(srcRelsXml); + const sourceDir = dirOf(sourceSlidePath); + const outDir = dirOf(outSlidePath); + const newRelLines: string[] = []; + const ridMap = new Map(); + rewritten = bgFragment.replace( + /\b(r:[a-zA-Z]+)="(rId\d+)"/g, + (_m, attr: string, srcRid: string) => { + const cached = ridMap.get(srcRid); + if (cached) return `${attr}="${cached}"`; + const srcRel = srcRels.get(srcRid); + if (!srcRel) return `${attr}="${srcRid}"`; + const newRid = `rId${nextRid++}`; + ridMap.set(srcRid, newRid); + let target = srcRel.target; + const isExternal = /^https?:\/\//i.test(target); + const isInternalPart = !isExternal && !target.startsWith("/"); + if (isInternalPart) { + const srcFullTarget = normalisePath(target, sourceDir); + const srcFile = srcZip.file(srcFullTarget); + if (srcFile) { + const newTarget = uniqueTarget(target, outZip, outDir); + const newFullTarget = normalisePath(newTarget, outDir); + outZip.file(newFullTarget, srcFile.async("uint8array"), { + binary: true, + }); + target = newTarget; + } + } + newRelLines.push(buildRelXml(newRid, srcRel.type, target)); + return `${attr}="${newRid}"`; + } + ); + if (newRelLines.length) { + const insertAt = outRelsXml.lastIndexOf(""); + outRelsXml = + insertAt >= 0 + ? outRelsXml.slice(0, insertAt) + + newRelLines.join("") + + outRelsXml.slice(insertAt) + : outRelsXml.replace( + /]*>/, + (m) => `${m}${newRelLines.join("")}` + ); + outZip.file(outRelsPath, outRelsXml); + } + } + + // Replace pptxgenjs's `...` (or self-closing equivalent) with + // the source fragment. When the output has no `` yet, insert + // immediately after `` so it precedes `` per the + // OOXML schema's ordering. + let updated = outXml; + const existingBgRe = /|]*\/\s*>/; + if (existingBgRe.test(outXml)) { + updated = outXml.replace(existingBgRe, rewritten); + } else { + const cSldOpenMatch = /]*>/.exec(outXml); + if (cSldOpenMatch) { + const idx = cSldOpenMatch.index + cSldOpenMatch[0].length; + updated = outXml.slice(0, idx) + rewritten + outXml.slice(idx); + } + } + if (updated !== outXml) outZip.file(outSlidePath, updated); +} + +function listPaths(zip: JSZip, prefixes: readonly string[]): string[] { + const out: string[] = []; + zip.forEach((relPath) => { + for (const prefix of prefixes) { + if (relPath.startsWith(prefix)) { + out.push(relPath); + return; + } + } + }); + return out; +} + +async function collectChromeMediaRefs( + srcZip: JSZip, + chromePaths: string[] +): Promise> { + const refs = new Set(); + for (const p of chromePaths) { + if (!p.endsWith(".rels")) continue; + const xml = await srcZip.file(p)?.async("string"); + if (!xml) continue; + const rels = parseRels(xml); + // The owning XML lives at e.g. `ppt/slideMasters/slideMaster1.xml`, + // its rels at `ppt/slideMasters/_rels/slideMaster1.xml.rels`. Targets + // are relative to the XML's directory. + const xmlPath = p.replace("/_rels/", "/").replace(/\.rels$/, ""); + const xmlDir = dirOf(xmlPath); + for (const { target } of rels.values()) { + if (/^https?:\/\//i.test(target)) continue; + const full = normalisePath(target, xmlDir); + // Pull media but also fonts (sometimes in `ppt/fonts/` already + // captured by chrome prefixes), embeddings, and any other + // chrome-adjacent payload — we err on the side of copying so + // brand-bar logos and embedded font glyphs survive. + if ( + full.startsWith("ppt/media/") || + full.startsWith("ppt/embeddings/") || + full.startsWith("ppt/charts/") + ) { + refs.add(full); + } + } + } + return refs; +} + +async function rewriteChromeRelsForRenames( + outZip: JSZip, + chromePaths: string[], + renames: Map +): Promise { + for (const p of chromePaths) { + if (!p.endsWith(".rels")) continue; + const xml = await outZip.file(p)?.async("string"); + if (!xml) continue; + const xmlPath = p.replace("/_rels/", "/").replace(/\.rels$/, ""); + const xmlDir = dirOf(xmlPath); + let changed = false; + const updated = xml.replace(/Target="([^"]+)"/g, (m, target: string) => { + if (/^https?:\/\//i.test(target)) return m; + const full = normalisePath(target, xmlDir); + const renamed = renames.get(full); + if (!renamed) return m; + changed = true; + return `Target="${relativeTarget(xmlDir, renamed)}"`; + }); + if (changed) outZip.file(p, updated); + } +} + +function relativeTarget(fromDir: string, toPath: string): string { + const fromSegs = fromDir.split("/").filter(Boolean); + const toSegs = toPath.split("/").filter(Boolean); + let i = 0; + while (i < fromSegs.length && i < toSegs.length && fromSegs[i] === toSegs[i]) { + i++; + } + const up = fromSegs.length - i; + const rest = toSegs.slice(i).join("/"); + return up > 0 ? `${"../".repeat(up)}${rest}` : rest; +} + +const CONTENT_TYPE_BY_DIR: Record = { + "ppt/slideMasters/": + "application/vnd.openxmlformats-officedocument.presentationml.slideMaster+xml", + "ppt/slideLayouts/": + "application/vnd.openxmlformats-officedocument.presentationml.slideLayout+xml", + "ppt/theme/": + "application/vnd.openxmlformats-officedocument.theme+xml", + "ppt/notesMasters/": + "application/vnd.openxmlformats-officedocument.presentationml.notesMaster+xml", + "ppt/handoutMasters/": + "application/vnd.openxmlformats-officedocument.presentationml.handoutMaster+xml", + "ppt/tags/": + "application/vnd.openxmlformats-officedocument.presentationml.tags+xml", +}; + +async function rewriteContentTypes( + outZip: JSZip, + srcChromePaths: string[] +): Promise { + const file = outZip.file("[Content_Types].xml"); + if (!file) return; + let xml = await file.async("string"); + + // Drop every existing Override under the chrome prefixes — pptxgenjs + // sometimes declares masters / layouts it never wrote, and we're about + // to declare the real set from source. + xml = xml.replace( + //g, + "" + ); + + // Build a fresh set of Override entries for chrome XML files we copied. + const additions: string[] = []; + const seenParts = new Set(); + // Re-scan existing xml to avoid duplicate part declarations. + const existingPartRe = /PartName="\/([^"]+)"/g; + let m: RegExpExecArray | null; + while ((m = existingPartRe.exec(xml))) seenParts.add(m[1]); + + for (const path of srcChromePaths) { + if (path.endsWith(".rels")) continue; + if (!path.endsWith(".xml")) continue; + const dirMatch = Object.keys(CONTENT_TYPE_BY_DIR).find((d) => + path.startsWith(d) + ); + if (!dirMatch) continue; + if (seenParts.has(path)) continue; + additions.push( + `` + ); + seenParts.add(path); + } + + // Embedded fonts: declare the `.fntdata` extension as a Default once. + const hasFonts = srcChromePaths.some((p) => p.startsWith("ppt/fonts/")); + if (hasFonts && !/Extension="fntdata"/i.test(xml)) { + additions.push( + `` + ); + } + + if (!additions.length) { + outZip.file("[Content_Types].xml", xml); + return; + } + + const closeIdx = xml.lastIndexOf(""); + if (closeIdx < 0) return; + const updated = + xml.slice(0, closeIdx) + additions.join("") + xml.slice(closeIdx); + outZip.file("[Content_Types].xml", updated); +} + +const REL_TYPE_SLIDE_MASTER = + "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideMaster"; +const REL_TYPE_THEME = + "http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme"; +const REL_TYPE_NOTES_MASTER = + "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster"; +const REL_TYPE_HANDOUT_MASTER = + "http://schemas.openxmlformats.org/officeDocument/2006/relationships/handoutMaster"; + +async function rewritePresentation( + outZip: JSZip, + srcZip: JSZip +): Promise { + const outRelsFile = outZip.file("ppt/_rels/presentation.xml.rels"); + const srcRelsFile = srcZip.file("ppt/_rels/presentation.xml.rels"); + const outPresFile = outZip.file("ppt/presentation.xml"); + const srcPresFile = srcZip.file("ppt/presentation.xml"); + if (!outRelsFile || !srcRelsFile || !outPresFile || !srcPresFile) return; + + const [outRelsXml, srcRelsXml, outPresXml, srcPresXml] = await Promise.all([ + outRelsFile.async("string"), + srcRelsFile.async("string"), + outPresFile.async("string"), + srcPresFile.async("string"), + ]); + const outRels = parseRels(outRelsXml); + const srcRels = parseRels(srcRelsXml); + + // 1. Drop pptxgenjs's chrome rels — slideMaster / theme / notesMaster / + // handoutMaster — and remember their rIds so we can scrub them out of + // `` etc. in presentation.xml. + const droppedRids = new Set(); + const keptRels: Array<[string, { type: string; target: string }]> = []; + for (const [id, rel] of outRels) { + if ( + rel.type === REL_TYPE_SLIDE_MASTER || + rel.type === REL_TYPE_THEME || + rel.type === REL_TYPE_NOTES_MASTER || + rel.type === REL_TYPE_HANDOUT_MASTER + ) { + droppedRids.add(id); + } else { + keptRels.push([id, rel]); + } + } + + // 2. Allocate fresh rIds for the source's chrome rels in the output's + // rId namespace, and remember the mapping so we can rewrite + // presentation.xml's entries. + let nextRid = + [...outRels.keys(), ...srcRels.keys()].reduce((max, id) => { + const n = /^rId(\d+)$/.exec(id); + return n ? Math.max(max, Number(n[1])) : max; + }, 0) + 1; + const srcToOutRid = new Map(); + const newChromeRels: string[] = []; + for (const [srcId, rel] of srcRels) { + if ( + rel.type !== REL_TYPE_SLIDE_MASTER && + rel.type !== REL_TYPE_THEME && + rel.type !== REL_TYPE_NOTES_MASTER && + rel.type !== REL_TYPE_HANDOUT_MASTER + ) { + continue; + } + const outId = `rId${nextRid++}`; + srcToOutRid.set(srcId, outId); + newChromeRels.push(buildRelXml(outId, rel.type, rel.target)); + } + + // 3. Rebuild presentation.xml.rels: kept slide / props rels + new chrome rels. + const rebuiltRels = + `` + + `` + + keptRels.map(([id, r]) => buildRelXml(id, r.type, r.target)).join("") + + newChromeRels.join("") + + ``; + outZip.file("ppt/_rels/presentation.xml.rels", rebuiltRels); + + // 4. Splice and from source into + // output's presentation.xml, with r:id values remapped to the new + // rIds allocated above. Anything else in the output (sldIdLst, sldSz, + // defaultTextStyle, etc.) is left alone — those describe the slide + // set pptxgenjs just wrote. + let pres = outPresXml; + pres = replaceListElement( + pres, + "p:sldMasterIdLst", + extractListElement(srcPresXml, "p:sldMasterIdLst"), + srcToOutRid + ); + pres = replaceListElement( + pres, + "p:notesMasterIdLst", + extractListElement(srcPresXml, "p:notesMasterIdLst"), + srcToOutRid + ); + // handoutMasterIdLst is rare but cheap to preserve. + pres = replaceListElement( + pres, + "p:handoutMasterIdLst", + extractListElement(srcPresXml, "p:handoutMasterIdLst"), + srcToOutRid + ); + + // 5. Carry over `` verbatim so PowerPoint knows which + // embedded fonts to install on open. Font payloads under ppt/fonts/ + // were already copied as part of the chrome sweep. + const embeddedFonts = extractListElement(srcPresXml, "p:embeddedFontLst"); + if (embeddedFonts) { + pres = replaceListElement(pres, "p:embeddedFontLst", embeddedFonts, srcToOutRid); + } + outZip.file("ppt/presentation.xml", pres); +} + +function extractListElement(xml: string, tag: string): string | null { + const open = xml.indexOf(`<${tag}`); + if (open < 0) return null; + // Self-closing form (``) is legal but uninteresting. + const selfCloseMatch = new RegExp(`<${tag}\\b[^>]*/\\s*>`).exec(xml); + if (selfCloseMatch && selfCloseMatch.index === open) return null; + const close = xml.indexOf(``, open); + if (close < 0) return null; + return xml.slice(open, close + tag.length + 3); +} + +function replaceListElement( + xml: string, + tag: string, + newFragment: string | null, + ridRemap: Map +): string { + if (!newFragment) return xml; + const remapped = newFragment.replace( + /\br:id="(rId\d+)"/g, + (_m, srcRid: string) => { + const out = ridRemap.get(srcRid); + return out ? `r:id="${out}"` : `r:id="${srcRid}"`; + } + ); + const open = xml.indexOf(`<${tag}`); + if (open < 0) { + // Tag not in output → insert just before if possible, + // otherwise just before . + const sldIdLst = xml.indexOf("= 0) { + return xml.slice(0, sldIdLst) + remapped + xml.slice(sldIdLst); + } + const closePres = xml.lastIndexOf(""); + return closePres >= 0 + ? xml.slice(0, closePres) + remapped + xml.slice(closePres) + : xml; + } + const selfCloseMatch = new RegExp(`<${tag}\\b[^>]*/\\s*>`).exec(xml); + if (selfCloseMatch && selfCloseMatch.index === open) { + return ( + xml.slice(0, selfCloseMatch.index) + + remapped + + xml.slice(selfCloseMatch.index + selfCloseMatch[0].length) + ); + } + const close = xml.indexOf(``, open); + if (close < 0) return xml; + return xml.slice(0, open) + remapped + xml.slice(close + tag.length + 3); +} + +async function rewriteSlideLayoutRefs( + outZip: JSZip, + srcZip: JSZip, + deck: Deck, + sourceSlidePaths: string[] +): Promise { + // Pre-compute a default fallback layout target for new slides that have + // no source counterpart: the first slideLayout the source ships. + let fallbackLayout: string | undefined; + srcZip.forEach((relPath) => { + if (fallbackLayout) return; + if ( + relPath.startsWith("ppt/slideLayouts/") && + relPath.endsWith(".xml") && + !relPath.includes("/_rels/") + ) { + fallbackLayout = relPath; + } + }); + + for (let i = 0; i < deck.slides.length; i++) { + const slide = deck.slides[i]; + const slideRelsPath = `ppt/slides/_rels/slide${i + 1}.xml.rels`; + const outSlideRelsXml = await outZip.file(slideRelsPath)?.async("string"); + if (!outSlideRelsXml) continue; + + const sourceSlidePath = + ((slide as unknown as Record)[SOURCE_SLIDE_PATH] as + | string + | undefined) ?? sourceSlidePaths[i]; + let layoutTargetFull: string | undefined; + if (sourceSlidePath) { + const srcSlideRelsPath = relsPathFor(sourceSlidePath); + const srcSlideRelsXml = await srcZip + .file(srcSlideRelsPath) + ?.async("string"); + if (srcSlideRelsXml) { + const srcRels = parseRels(srcSlideRelsXml); + for (const rel of srcRels.values()) { + if ( + rel.type === + "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout" + ) { + layoutTargetFull = normalisePath( + rel.target, + dirOf(sourceSlidePath) + ); + break; + } + } + } + } + if (!layoutTargetFull && fallbackLayout) { + layoutTargetFull = fallbackLayout; + } + if (!layoutTargetFull) continue; + + // Rewrite the layout target in the output's slide rels. The slide XML + // itself lives at ppt/slides/slideN.xml, so targets there are + // relative to ppt/slides/. + const newTarget = relativeTarget("ppt/slides", layoutTargetFull); + const updated = outSlideRelsXml.replace( + /()/, + `$1${newTarget}$3` + ); + if (updated !== outSlideRelsXml) { + outZip.file(slideRelsPath, updated); + } + } +} + +async function aspectRatiosMatch( + outZip: JSZip, + srcZip: JSZip +): Promise { + const [outPres, srcPres] = await Promise.all([ + outZip.file("ppt/presentation.xml")?.async("string"), + srcZip.file("ppt/presentation.xml")?.async("string"), + ]); + if (!outPres || !srcPres) return false; + const outSz = parseSldSz(outPres); + const srcSz = parseSldSz(srcPres); + if (!outSz || !srcSz) return false; + const outRatio = outSz.cx / outSz.cy; + const srcRatio = srcSz.cx / srcSz.cy; + // ~1% tolerance covers floating-point drift; PPTX aspect ratios are + // exact integer EMU. + return Math.abs(outRatio - srcRatio) / outRatio < 0.01; +} + +function parseSldSz(xml: string): { cx: number; cy: number } | null { + const m = /` from the deck which, + // combined with a slide-level catch upstream, could wipe everything + // on the same slide. const decoded = await decodeMetafileToDataUrl(file, ext); if (!decoded) { ctx.diagnostics.warnings.push( - `Skipped ${ext.toUpperCase()} image at ${fullPath} — vector metafile decode unavailable in this environment.` + `Preserving ${ext.toUpperCase()} image at ${fullPath} as UnknownElement — vector metafile decode unavailable in this environment.` ); - return null; + return toUnknown(pic, "p:pic", ctx, outer); } // decoded is `data:image/png;base64,…` — strip prefix to match the // common path below. From b5f9c790388d99616664daf31cedde27826ee91f Mon Sep 17 00:00:00 2001 From: karthikmudunuri <102793643+karthikmudunuri@users.noreply.github.com> Date: Wed, 13 May 2026 23:03:34 +0530 Subject: [PATCH 2/2] test(pptx): skipIf fixtures missing so CI passes without .context attachments --- .../__tests__/chrome-preservation.test.ts | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/packages/slidewise/src/lib/pptx/__tests__/chrome-preservation.test.ts b/packages/slidewise/src/lib/pptx/__tests__/chrome-preservation.test.ts index 2ccea47..094ae1d 100644 --- a/packages/slidewise/src/lib/pptx/__tests__/chrome-preservation.test.ts +++ b/packages/slidewise/src/lib/pptx/__tests__/chrome-preservation.test.ts @@ -1,5 +1,5 @@ import { describe, it, expect } from "vitest"; -import { readFile } from "node:fs/promises"; +import { readFile, access } from "node:fs/promises"; import { fileURLToPath } from "node:url"; import path from "node:path"; import JSZip from "jszip"; @@ -7,16 +7,34 @@ import { parsePptx, serializeDeck } from "../index"; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); +// Real client decks (Dickinson, eon-deck) live in the gitignored +// `.context/attachments/` Conductor workspace dir — they're branded +// samples we can't commit publicly. Tests `it.skipIf` themselves when +// the fixture isn't on disk so CI stays green for outside contributors +// while the regression guards run locally / on workspaces that have +// the fixtures available. const attachmentsDir = path.resolve( __dirname, "../../../../../../.context/attachments" ); +async function fixtureExists(name: string): Promise { + try { + await access(path.join(attachmentsDir, name)); + return true; + } catch { + return false; + } +} + async function loadFixture(name: string): Promise { const buf = await readFile(path.join(attachmentsDir, name)); return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength) as ArrayBuffer; } +const hasEon = await fixtureExists("eon-deck.pptx"); +const hasDickinson = await fixtureExists("Dickinson_Sample_Slides.pptx"); + async function listZipPaths(buf: ArrayBuffer | Blob): Promise> { const ab = buf instanceof Blob ? await buf.arrayBuffer() : buf; const zip = await JSZip.loadAsync(ab); @@ -56,7 +74,7 @@ async function countSlidesWithSpTreeChildren( } describe("deck chrome preservation", () => { - it("preserves slide masters / layouts / theme / fonts on a 16:9 source (eon-deck)", async () => { + it.skipIf(!hasEon)("preserves slide masters / layouts / theme / fonts on a 16:9 source (eon-deck)", async () => { const source = await loadFixture("eon-deck.pptx"); const deck = await parsePptx(source); @@ -93,7 +111,7 @@ describe("deck chrome preservation", () => { expect(outPaths.has("ppt/theme/theme1.xml")).toBe(true); }); - it("keeps slide content intact when the source has EMF pictures (Dickinson)", async () => { + it.skipIf(!hasDickinson)("keeps slide content intact when the source has EMF pictures (Dickinson)", async () => { const source = await loadFixture("Dickinson_Sample_Slides.pptx"); const deck = await parsePptx(source);