superdoc-dev · financialvice · May 12, 2026 · caio-pizzol · May 12, 2026 · caio-pizzol
@@ -21,6 +21,66 @@ const FONT_CONTENT_TYPES = {
   otf: 'application/vnd.ms-opentype',
 };
 
+const OFFICE_DOCUMENT_RELATIONSHIP_TYPE =
+  'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument';
+
+const normalizePackagePath = (path) => {
+  if (typeof path !== 'string') return null;
+  return path.replace(/^\/+/, '').replace(/^\.\//, '');
+};
+
+const getRelationshipElements = (relationshipsXml) => {
+  if (!relationshipsXml) return [];
+
+  try {
+    const parsed = xmljs.xml2js(relationshipsXml, { compact: false });
+    const relationships = parsed.elements?.find((el) => el.name === 'Relationships');
+    return relationships?.elements ?? [];
+  } catch {
+    return [];
+  }
+};
+
+const getMainDocumentPath = async (zip) => {
+  const packageRels = zip.file('_rels/.rels');
+  if (!packageRels) return null;
+
+  const relsXml = ensureXmlString(await packageRels.async('uint8array'));
+  const officeDocumentRel = getRelationshipElements(relsXml).find((rel) => {
+    const attrs = rel?.attributes ?? {};
+    return attrs.Type === OFFICE_DOCUMENT_RELATIONSHIP_TYPE && attrs.TargetMode !== 'External';
+  });
+
+  return normalizePackagePath(officeDocumentRel?.attributes?.Target);
+};
+
+const isRootLevelMainDocumentPackage = (mainDocumentPath) => {
+  return Boolean(mainDocumentPath) && !mainDocumentPath.includes('/');
+};
+
+const normalizeRootLevelWordPartPath = (name, mainDocumentPath) => {
+  if (!isRootLevelMainDocumentPackage(mainDocumentPath)) return name;
+  if (name === '[Content_Types].xml' || name === '_rels/.rels') return name;
+  if (name.startsWith('word/')) return name;
+
+  const mainDocumentRelsPath = `_rels/${mainDocumentPath}.rels`;
+  if (name === mainDocumentRelsPath) return 'word/_rels/document.xml.rels';
+
+  if (name.startsWith('_rels/') && name.endsWith('.rels')) {
+    return `word/${name}`;
+  }
+
+  if (!name.includes('/') && name.endsWith('.xml')) {
+    return `word/${name}`;
+  }
+
+  if (name.startsWith('images/')) {
+    return `word/${name}`;
+  }
+
+  return name;
+};
+
 /**
  * Class to handle unzipping and zipping of docx files
  */
@@ -111,10 +171,11 @@ class DocxZipper {
     // If caller supplied a password but the file isn't encrypted, ignore it.
 
     const extractedFiles = await this.unzip(fileData);
+    const mainDocumentPath = await getMainDocumentPath(extractedFiles);
     const files = Object.entries(extractedFiles.files);
 
     for (const [, zipEntry] of files) {
-      const name = zipEntry.name;
+      const name = normalizeRootLevelWordPartPath(zipEntry.name, mainDocumentPath);
 
       if (isXmlLike(name)) {
         // Read raw bytes and decode (handles UTF-8 & UTF-16)
@@ -123,7 +184,9 @@ class DocxZipper {
         this.files.push({ name, content });
       } else if (
         (name.startsWith('word/media') && name !== 'word/media/') ||
+        (name.startsWith('word/images') && name !== 'word/images/') ||
         (zipEntry.name.startsWith('media') && zipEntry.name !== 'media/') ||
+        (zipEntry.name.startsWith('images') && zipEntry.name !== 'images/') ||
         (name.startsWith('media') && name !== 'media/') ||
         (name.startsWith('word/embeddings') && name !== 'word/embeddings/')
       ) {

@@ -48,6 +48,63 @@ describe('DocxZipper - file extraction', () => {
     const documentXml = unzippedXml.find((file) => file.name === 'word/document.xml');
     expect(documentXml).toBeTruthy();
   });
+
+  it('normalizes root-level WordprocessingML parts to canonical word paths', async () => {
+    const zip = new JSZip();
+    const contentTypes = `<?xml version="1.0" encoding="UTF-8"?>
+      <Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
+        <Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
+        <Default Extension="xml" ContentType="application/xml"/>
+        <Override PartName="/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
+        <Override PartName="/settings.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml"/>
+        <Override PartName="/header1.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"/>
+      </Types>`;
+    zip.file('[Content_Types].xml', contentTypes);
+    zip.file(
+      '_rels/.rels',
+      `<?xml version="1.0" encoding="UTF-8"?>
+        <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
+          <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="document.xml"/>
+        </Relationships>`,
+    );
+    zip.file(
+      '_rels/document.xml.rels',
+      `<?xml version="1.0" encoding="UTF-8"?>
+        <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
+          <Relationship Id="rId2" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings" Target="settings.xml"/>
+          <Relationship Id="rId3" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/header" Target="header1.xml"/>
+          <Relationship Id="rId4" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" Target="images/1.png"/>
+        </Relationships>`,
+    );
+    zip.file(
+      'document.xml',
+      `<?xml version="1.0" encoding="UTF-8"?>
+        <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
+          <w:body><w:p><w:r><w:t>Hello from root</w:t></w:r></w:p></w:body>
+        </w:document>`,
+    );
+    zip.file(
+      'settings.xml',
+      '<?xml version="1.0" encoding="UTF-8"?><w:settings xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"/>',
+    );
+    zip.file(
+      'header1.xml',
+      '<?xml version="1.0" encoding="UTF-8"?><w:hdr xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"/>',
+    );
+    zip.file('images/1.png', Buffer.from([0x89, 0x50, 0x4e, 0x47]));
+
+    const buf = await zip.generateAsync({ type: 'nodebuffer' });
+    const files = await zipper.getDocxData(buf, true);
+    const names = files.map((file) => file.name);
+
+    expect(names).toContain('word/document.xml');
+    expect(names).toContain('word/settings.xml');
+    expect(names).toContain('word/header1.xml');
+    expect(names).toContain('word/_rels/document.xml.rels');
+    expect(names).not.toContain('document.xml');
+    expect(names).not.toContain('_rels/document.xml.rels');
+    expect(zipper.mediaFiles['word/images/1.png']).toBeTruthy();
+  });
 });
 
 // Helper to build a UTF-16LE Buffer with BOM