From 0546d5a121d18aea0e23bc235442a2c83ef9d4a7 Mon Sep 17 00:00:00 2001 From: karthikmudunuri <102793643+karthikmudunuri@users.noreply.github.com> Date: Fri, 12 Jun 2026 11:27:27 +0530 Subject: [PATCH] =?UTF-8?q?feat(pptx):=20applyEdits=20=E2=80=94=20lossless?= =?UTF-8?q?=20surgical-edit=20API?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add applyEdits(source, plan, options?): a patch on the original .pptx bytes rather than a full re-serialize. Everything not named by an edit comes out byte-identical to the source (masters, layouts, theme, fonts, tags, notes, embeddings, untouched elements); the result opens in PowerPoint with no repair. - New elementLocationRegistry in the parser (getElementLocation) maps every element id -> its verbatim source XML block, with no placeholder filtering. - Ops: setText/clearText, setChartData (in-place chart fill keeping type/colour + a regenerated, consistent embedded xlsx so Edit-Data works), setTableData, setImage, removeElement, addChart, addDiagram, per-slide background, title. - Slide subset/reorder/repeat from the template; repeats deep-clone from a pristine view so edits never bleed across copies. - Removed slides + their exclusive parts reclaimed by a reachability sweep, then reconcileDanglingRels + content-type pruning. - Unresolved ids / unsupported layout-instantiation surface via onWarning. Tests on a synthetic 4-slide template incl. the acceptance test (untouched slide byte-identical, chart workbook updated, zero dangling rels). --- .../applyedits-lossless-surgical-edits.md | 26 + README.md | 51 + packages/slidewise/src/index.ts | 9 +- .../lib/pptx/__tests__/apply-edits.test.ts | 577 ++++++++ packages/slidewise/src/lib/pptx/applyEdits.ts | 1298 +++++++++++++++++ packages/slidewise/src/lib/pptx/index.ts | 10 + packages/slidewise/src/lib/pptx/pptxToDeck.ts | 44 + 7 files changed, 2014 insertions(+), 1 deletion(-) create mode 100644 .changeset/applyedits-lossless-surgical-edits.md create mode 100644 packages/slidewise/src/lib/pptx/__tests__/apply-edits.test.ts create mode 100644 packages/slidewise/src/lib/pptx/applyEdits.ts diff --git a/.changeset/applyedits-lossless-surgical-edits.md b/.changeset/applyedits-lossless-surgical-edits.md new file mode 100644 index 0000000..f10033f --- /dev/null +++ b/.changeset/applyedits-lossless-surgical-edits.md @@ -0,0 +1,26 @@ +--- +"@textcortex/slidewise": minor +--- + +feat(pptx): `applyEdits` — lossless surgical-edit API + +Add `applyEdits(source, plan, options?)`: a patch on the original `.pptx` bytes +rather than a full re-serialize. The create flow can now emit an `EditPlan` +(subset/reorder/repeat of template slides, each with edits) and get back a valid +package where everything not named by an edit is byte-identical to the source — +masters, layouts, theme, embedded fonts, `ppt/tags/*`, notes, embeddings, and +any untouched element. This removes the lossy round-trip that produced the +`custGeom`/SVG-fallback/dangling-rel fidelity bugs and lets hosts drop their +defensive cleanup. `serializeDeck` stays for the live editor / from-scratch decks. + +Edits address elements by the same stable ids `parsePptx` returns; slides by +1-based template index. Supported ops: `setText`/`clearText` (preserve the +template box + run styling, or rebuild from supplied runs), `setChartData` +(repopulate a native chart in place — type/colours kept, caches **and** the +embedded `xlsx` workbook updated so Edit-Data still works), `setTableData`, +`setImage`, `removeElement`, `addChart`, `addDiagram`, plus per-slide +`background` and deck `title`. Removed slides and any parts that become +exclusive to them are reclaimed by a package-wide reachability sweep, then +dangling relationships and content-types are reconciled. Unresolved element ids +and unsupported layout-instantiation are surfaced via `onWarning` instead of +throwing. diff --git a/README.md b/README.md index 8233bdd..132c46b 100644 --- a/README.md +++ b/README.md @@ -114,6 +114,57 @@ editor only sees current-shape decks. It throws if the input was written by a newer Slidewise than the host has installed — pin the version range you can support. +### Lossless surgical edits with `applyEdits` + +When you start from a branded template and only need to change a few things +(swap some text, fill a chart, drop a sample element), a full +`serializeDeck` round-trip is overkill — and re-rendering unedited elements is +where fidelity bugs come from. `applyEdits(source, plan)` instead **patches the +original bytes**: everything not named by an edit comes out byte-identical to +the source (masters, layouts, theme, embedded fonts, `ppt/tags/*`, notes, +embeddings, and any untouched element), and the result opens in PowerPoint with +no repair. + +```ts +import { parsePptx, applyEdits, type EditPlan } from "@textcortex/slidewise"; + +const deck = await parsePptx(source); // address elements by deck ids +const plan: EditPlan = { + title: "Q3 Results", + // Output order = this list. Slides are the source's 1-based template index; + // a source slide may repeat for controlled reuse. + slides: [ + { + source: { slideIndex: 1 }, + edits: [{ op: "setText", elementId: titleId, text: "Q3 Results" }], + }, + { + source: { slideIndex: 3 }, + edits: [ + // Repopulate a native chart in place — type/colours and the embedded + // workbook are preserved, so PowerPoint's Edit-Data still works. + { op: "setChartData", elementId: chartId, categories: ["Jan", "Feb", "Mar"], series: [{ name: "Revenue", values: [10, 20, 30] }] }, + { op: "removeElement", elementId: sampleChartId }, + ], + }, + { source: { slideIndex: 4 }, edits: [] }, // kept byte-identical + ], +}; + +const out: Uint8Array = await applyEdits(source, plan, { + onWarning: (w) => notifyHost(w.message), // unresolved id / unsupported op +}); +``` + +Ops: `setText` / `clearText`, `setChartData`, `setTableData`, `setImage`, +`removeElement`, `addChart`, `addDiagram`, plus per-slide `background` and the +deck `title`. Elements are addressed by the same stable ids `parsePptx` returns, +so call `applyEdits` in the same process as the `parsePptx` that produced the +plan. Removed slides and any parts exclusive to them are reclaimed +automatically. `serializeDeck` remains the path for the live editor and +from-scratch decks; `applyEdits` is the lossless path for template-derived +output. + ### Generating slides from the template's layouts `parsePptx` exposes the source template's master layouts on `deck.layouts`. diff --git a/packages/slidewise/src/index.ts b/packages/slidewise/src/index.ts index 7aeff4c..97c9954 100644 --- a/packages/slidewise/src/index.ts +++ b/packages/slidewise/src/index.ts @@ -88,10 +88,17 @@ export { type SlideRailItemContextValue, } from "./compound"; -export { parsePptx, isPptxTemplate, serializeDeck } from "./lib/pptx"; +export { parsePptx, isPptxTemplate, serializeDeck, applyEdits } from "./lib/pptx"; export type { SerializeOptions, SerializeWarning, + EditPlan, + PlannedSlide, + Edit, + Run, + Series, + Rect, + ApplyEditsOptions, } from "./lib/pptx"; export type { ParseDiagnostics, ParseResult } from "./lib/pptx/types"; diff --git a/packages/slidewise/src/lib/pptx/__tests__/apply-edits.test.ts b/packages/slidewise/src/lib/pptx/__tests__/apply-edits.test.ts new file mode 100644 index 0000000..8d6657f --- /dev/null +++ b/packages/slidewise/src/lib/pptx/__tests__/apply-edits.test.ts @@ -0,0 +1,577 @@ +import { describe, it, expect } from "vitest"; +import JSZip from "jszip"; +import { parsePptx } from "../index"; +import { applyEdits, type EditPlan, type SerializeWarning } from "../index"; +import type { Deck } from "../../types"; + +/** + * `applyEdits` — lossless surgical-edit API tests. Built on a synthetic 4-slide + * template (no branded fixture needed) that carries the element kinds the API + * patches: text placeholders, a native chart with an embedded workbook, a + * "sample" chart to strip, a table, and an untouched decorative slide. + */ + +const NS_P = "http://schemas.openxmlformats.org/presentationml/2006/main"; +const NS_A = "http://schemas.openxmlformats.org/drawingml/2006/main"; +const NS_R = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"; + +// Smallest valid PNG (1×1 transparent). +const ONE_PX_PNG = Uint8Array.from([ + 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00, 0x00, 0x0d, 0x49, + 0x48, 0x44, 0x52, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x08, 0x06, + 0x00, 0x00, 0x00, 0x1f, 0x15, 0xc4, 0x89, 0x00, 0x00, 0x00, 0x0d, 0x49, 0x44, + 0x41, 0x54, 0x78, 0x9c, 0x63, 0x00, 0x01, 0x00, 0x00, 0x05, 0x00, 0x01, 0x0d, + 0x0a, 0x2d, 0xb4, 0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4e, 0x44, 0xae, 0x42, + 0x60, 0x82, +]); + +function sp(id: number, name: string, x: number, y: number, text: string): string { + return ( + `` + + `` + + `` + + `${text}` + + `` + ); +} + +function chartFrame(id: number, name: string, rid: string): string { + return ( + `` + + `` + + `` + + `` + + `` + ); +} + +/** A bar chart part with one series + an embedded-workbook reference. */ +function chartXml(externalDataRid: string | null): string { + const ext = externalDataRid + ? `` + : ""; + return ( + `` + + `` + + `` + + `` + + `` + + `Sheet1!$B$1Old Series` + + `` + + `Sheet1!$A$2:$A$3OldAOldB` + + `Sheet1!$B$2:$B$3General12` + + `` + + `` + + `` + + `` + + `${ext}` + ); +} + +function tableFrame(id: number, name: string): string { + return ( + `` + + `` + + `` + + `` + + `H1H2` + + `ab` + + `` + ); +} + +function picFrame(id: number, name: string, rid: string): string { + return ( + `` + + `` + + `` + + `` + ); +} + +function slide(body: string, withR = false): string { + const rns = withR ? ` xmlns:r="${NS_R}"` : ""; + return ( + `` + + `` + + `` + + body + + `` + ); +} + +function relsXml(entries: string): string { + return ( + `` + + `${entries}` + ); +} + +/** Build the 4-slide synthetic template. */ +async function buildTemplate(): Promise { + const zip = new JSZip(); + + zip.file( + "[Content_Types].xml", + `` + + `` + + `` + + `` + + `` + + `` + + `` + + `` + + `` + + `` + + `` + + `` + + `` + + `` + + `` + ); + + zip.file( + "_rels/.rels", + relsXml( + `` + + `` + ) + ); + + zip.file( + "docProps/core.xml", + `` + + `Template` + ); + + zip.file( + "ppt/presentation.xml", + `` + + `` + + `` + + `` + + `` + + `` + ); + zip.file( + "ppt/_rels/presentation.xml.rels", + relsXml( + `` + + `` + + `` + + `` + ) + ); + + // Slide 1: a title placeholder. + zip.file("ppt/slides/slide1.xml", slide(sp(2, "Title", 1000000, 500000, "Old Title"))); + zip.file("ppt/slides/_rels/slide1.xml.rels", relsXml("")); + + // Slide 2: body text + native chart with embedded workbook. + zip.file( + "ppt/slides/slide2.xml", + slide(sp(2, "Body", 1000000, 500000, "Old Body") + chartFrame(3, "Chart", "rId1"), true) + ); + zip.file( + "ppt/slides/_rels/slide2.xml.rels", + relsXml(``) + ); + zip.file("ppt/charts/chart1.xml", chartXml("rId1")); + zip.file( + "ppt/charts/_rels/chart1.xml.rels", + relsXml(``) + ); + zip.file("ppt/embeddings/wb1.xlsx", Uint8Array.from([0x50, 0x4b, 0x03, 0x04])); // placeholder + + // Slide 3: a sample chart to remove + a caption. + zip.file( + "ppt/slides/slide3.xml", + slide(chartFrame(2, "SampleChart", "rId1") + sp(3, "Caption", 1000000, 5000000, "Caption"), true) + ); + zip.file( + "ppt/slides/_rels/slide3.xml.rels", + relsXml(``) + ); + zip.file("ppt/charts/chart2.xml", chartXml(null)); + zip.file("ppt/charts/_rels/chart2.xml.rels", relsXml("")); + + // Slide 4: untouched decorative slide with an image. + zip.file("ppt/slides/slide4.xml", slide(picFrame(2, "Pic", "rId1"), true)); + zip.file( + "ppt/slides/_rels/slide4.xml.rels", + relsXml(``) + ); + zip.file("ppt/media/pic.png", ONE_PX_PNG); + + return zip.generateAsync({ type: "uint8array" }); +} + +async function loadZip(bytes: Uint8Array): Promise { + return JSZip.loadAsync(bytes); +} + +/** Assert every internal relationship target resolves to a present part. */ +async function assertNoDanglingRels(zip: JSZip): Promise { + const present = new Set(); + zip.forEach((p, e) => { + if (!e.dir) present.add(p); + }); + const relsPaths: string[] = []; + zip.forEach((p, e) => { + if (!e.dir && p.endsWith(".rels")) relsPaths.push(p); + }); + for (const relsPath of relsPaths) { + const xml = await zip.file(relsPath)!.async("string"); + const ownerDir = relsPath.includes("/_rels/") + ? relsPath.slice(0, relsPath.indexOf("/_rels/")) + : ""; + for (const m of xml.matchAll(/]*\/?>/g)) { + const tag = m[0]; + const mode = /\bTargetMode="([^"]+)"/.exec(tag)?.[1]; + const target = /\bTarget="([^"]+)"/.exec(tag)?.[1]; + if (!target || mode === "External" || /^https?:/.test(target)) continue; + const full = normalise(target, ownerDir); + expect(present.has(full), `${relsPath} → ${target} resolves to ${full}`).toBe(true); + } + } +} + +function normalise(target: string, baseDir: string): string { + if (target.startsWith("/")) return target.slice(1); + const segs = (baseDir ? baseDir.split("/") : []).concat(target.split("/")); + const out: string[] = []; + for (const s of segs) { + if (s === "" || s === ".") continue; + if (s === "..") out.pop(); + else out.push(s); + } + return out.join("/"); +} + +function idOf(deck: Deck, slideIdx: number, pred: (e: Deck["slides"][number]["elements"][number]) => boolean): string { + const el = deck.slides[slideIdx].elements.find(pred); + if (!el) throw new Error("element not found"); + return el.id; +} + +describe("applyEdits", () => { + it("acceptance: edits text + fills a chart + removes a sample chart, leaving an untouched slide byte-identical with zero dangling rels", async () => { + const source = await buildTemplate(); + const deck = await parsePptx(source); + + const titleId = idOf(deck, 0, (e) => e.type === "text"); + const bodyId = idOf(deck, 1, (e) => e.type === "text"); + const chartId = idOf(deck, 1, (e) => e.type === "chart"); + const captionId = idOf(deck, 2, (e) => e.type === "text"); + const sampleChartId = idOf(deck, 2, (e) => e.type === "chart"); + + const warnings: SerializeWarning[] = []; + const plan: EditPlan = { + title: "Q3 Results", + slides: [ + { source: { slideIndex: 1 }, edits: [{ op: "setText", elementId: titleId, text: "New Title" }] }, + { + source: { slideIndex: 2 }, + edits: [ + { op: "setText", elementId: bodyId, text: "New Body" }, + { + op: "setChartData", + elementId: chartId, + categories: ["Jan", "Feb", "Mar"], + series: [{ name: "Revenue", values: [10, 20, 30] }], + }, + ], + }, + { + source: { slideIndex: 3 }, + edits: [ + { op: "setText", elementId: captionId, text: "New Caption" }, + { op: "removeElement", elementId: sampleChartId }, + ], + }, + { source: { slideIndex: 4 }, edits: [] }, + ], + }; + + const out = await applyEdits(source, plan, { onWarning: (w) => warnings.push(w) }); + expect(warnings).toEqual([]); + + const srcZip = await loadZip(source); + const outZip = await loadZip(out); + + // (d) Slide 4 + its media stay byte-identical to source. + const srcSlide4 = await srcZip.file("ppt/slides/slide4.xml")!.async("uint8array"); + const outSlide4 = await outZip.file("ppt/slides/slide4.xml")!.async("uint8array"); + expect(outSlide4).toEqual(srcSlide4); + const srcPic = await srcZip.file("ppt/media/pic.png")!.async("uint8array"); + const outPic = await outZip.file("ppt/media/pic.png")!.async("uint8array"); + expect(outPic).toEqual(srcPic); + + // (a) Text edits applied. + expect(await outZip.file("ppt/slides/slide1.xml")!.async("string")).toContain("New Title"); + expect(await outZip.file("ppt/slides/slide2.xml")!.async("string")).toContain("New Body"); + expect(await outZip.file("ppt/slides/slide3.xml")!.async("string")).toContain("New Caption"); + + // (b) Chart caches reflect new data; type + colour preserved. + const chart = await outZip.file("ppt/charts/chart1.xml")!.async("string"); + expect(chart).toContain(""); + expect(chart).toContain("EA1B0A"); // template series colour preserved + expect(chart).toContain("Revenue"); + expect(chart).toContain("Jan"); + expect(chart).toContain("Mar"); + expect(chart).not.toContain("OldA"); + + // (b) Embedded workbook reflects the new data and is a valid xlsx package. + const wbBytes = await outZip.file("ppt/embeddings/wb1.xlsx")!.async("uint8array"); + const wb = await loadZip(wbBytes); + const sheet = await wb.file("xl/worksheets/sheet1.xml")!.async("string"); + expect(sheet).toContain("Jan"); + expect(sheet).toContain("Revenue"); + expect(sheet).toContain("30"); + await assertNoDanglingRels(wb); + + // (c) Sample chart removed; its part + workbook reclaimed. + const slide3 = await outZip.file("ppt/slides/slide3.xml")!.async("string"); + expect(slide3).not.toContain("SampleChart"); + expect(outZip.file("ppt/charts/chart2.xml")).toBeNull(); + + // Whole package: structurally intact (root rels + content types present), + // zero dangling rels + title written. + expect(outZip.file("_rels/.rels")).not.toBeNull(); + expect(outZip.file("[Content_Types].xml")).not.toBeNull(); + await assertNoDanglingRels(outZip); + expect(await outZip.file("docProps/core.xml")!.async("string")).toContain("Q3 Results"); + }); + + it("selects a subset of slides", async () => { + const source = await buildTemplate(); + const plan: EditPlan = { slides: [{ source: { slideIndex: 4 }, edits: [] }] }; + const out = await applyEdits(source, plan); + const zip = await loadZip(out); + + // Only the kept slide survives; the others (and their deps) are gone. + expect(zip.file("ppt/slides/slide4.xml")).not.toBeNull(); + expect(zip.file("ppt/slides/slide1.xml")).toBeNull(); + expect(zip.file("ppt/slides/slide2.xml")).toBeNull(); + expect(zip.file("ppt/charts/chart1.xml")).toBeNull(); + expect(zip.file("ppt/embeddings/wb1.xlsx")).toBeNull(); + + const pres = await zip.file("ppt/presentation.xml")!.async("string"); + expect((pres.match(/ { + const source = await buildTemplate(); + const plan: EditPlan = { + slides: [ + { source: { slideIndex: 4 }, edits: [] }, + { source: { slideIndex: 1 }, edits: [] }, + ], + }; + const out = await applyEdits(source, plan); + const zip = await loadZip(out); + + const pres = await zip.file("ppt/presentation.xml")!.async("string"); + const presRels = await zip.file("ppt/_rels/presentation.xml.rels")!.async("string"); + const order = [...pres.matchAll(/]*\br:id="([^"]+)"/g)].map((m) => m[1]); + const targetById = new Map( + [...presRels.matchAll(/]*\bId="([^"]+)"[^>]*\bTarget="([^"]+)"/g)].map( + (m) => [m[1], m[2]] + ) + ); + expect(order.map((id) => targetById.get(id))).toEqual([ + "slides/slide4.xml", + "slides/slide1.xml", + ]); + await assertNoDanglingRels(zip); + }); + + it("repeats a source slide into independent copies", async () => { + const source = await buildTemplate(); + const deck = await parsePptx(source); + const titleId = idOf(deck, 0, (e) => e.type === "text"); + const plan: EditPlan = { + slides: [ + { source: { slideIndex: 1 }, edits: [{ op: "setText", elementId: titleId, text: "First" }] }, + { source: { slideIndex: 1 }, edits: [{ op: "setText", elementId: titleId, text: "Second" }] }, + ], + }; + const out = await applyEdits(source, plan); + const zip = await loadZip(out); + + // The original slide1 holds the first edit; a clone holds the second. + const slide1 = await zip.file("ppt/slides/slide1.xml")!.async("string"); + expect(slide1).toContain("First"); + expect(slide1).not.toContain("Second"); + const slidePaths: string[] = []; + zip.forEach((p) => { + if (/^ppt\/slides\/slide\w+\.xml$/.test(p)) slidePaths.push(p); + }); + expect(slidePaths.length).toBe(2); + const cloned = slidePaths.find((p) => p !== "ppt/slides/slide1.xml")!; + expect(await zip.file(cloned)!.async("string")).toContain("Second"); + await assertNoDanglingRels(zip); + }); + + it("setText with runs rebuilds the paragraph; clearText blanks it", async () => { + const source = await buildTemplate(); + const deck = await parsePptx(source); + const titleId = idOf(deck, 0, (e) => e.type === "text"); + const captionId = idOf(deck, 2, (e) => e.type === "text"); + const plan: EditPlan = { + slides: [ + { + source: { slideIndex: 1 }, + edits: [ + { + op: "setText", + elementId: titleId, + text: "x", + runs: [{ text: "Bold", fontWeight: 700, color: "#FF0000" }], + }, + ], + }, + { source: { slideIndex: 3 }, edits: [{ op: "clearText", elementId: captionId }] }, + ], + }; + const out = await applyEdits(source, plan); + const zip = await loadZip(out); + const slide1 = await zip.file("ppt/slides/slide1.xml")!.async("string"); + expect(slide1).toContain("Bold"); + expect(slide1).toContain('b="1"'); + expect(slide1).toContain("FF0000"); + const slide3 = await zip.file("ppt/slides/slide3.xml")!.async("string"); + expect(slide3).not.toContain("Caption"); + }); + + it("fills a native table, keeping its structure", async () => { + const source = await buildTemplate(); + // Add a table to slide 4 for this test by re-parsing a template variant. + const deck = await parsePptx(source); + void deck; + // Build a fresh template whose slide 1 is a table. + const zipIn = await loadZip(source); + zipIn.file("ppt/slides/slide1.xml", slide(tableFrame(2, "Tbl"))); + const tableSource = await zipIn.generateAsync({ type: "uint8array" }); + const tdeck = await parsePptx(tableSource); + const tableId = idOf(tdeck, 0, (e) => e.type === "table"); + const plan: EditPlan = { + slides: [ + { + source: { slideIndex: 1 }, + edits: [ + { + op: "setTableData", + elementId: tableId, + rows: [ + ["Region", "Sales"], + ["EMEA", "100"], + ], + }, + ], + }, + ], + }; + const out = await applyEdits(tableSource, plan); + const zip = await loadZip(out); + const s1 = await zip.file("ppt/slides/slide1.xml")!.async("string"); + expect(s1).toContain("Region"); + expect(s1).toContain("EMEA"); + expect(s1).toContain(""); + expect(s1).not.toContain("H1"); + await assertNoDanglingRels(zip); + }); + + it("adds a new native chart into a region", async () => { + const source = await buildTemplate(); + const plan: EditPlan = { + slides: [ + { + source: { slideIndex: 1 }, + edits: [ + { + op: "addChart", + bounds: { x: 100, y: 100, w: 600, h: 400 }, + kind: "column", + categories: ["A", "B"], + series: [{ name: "S", values: [3, 4] }], + }, + ], + }, + ], + }; + const out = await applyEdits(source, plan); + const zip = await loadZip(out); + const s1 = await zip.file("ppt/slides/slide1.xml")!.async("string"); + expect(s1).toContain("graphicData"); + // A chart part was written + content-typed. + const ct = await zip.file("[Content_Types].xml")!.async("string"); + expect(ct).toContain("drawingml.chart+xml"); + await assertNoDanglingRels(zip); + }); + + it("replaces an image's bytes", async () => { + const source = await buildTemplate(); + const deck = await parsePptx(source); + const imgId = idOf(deck, 3, (e) => e.type === "image"); + const newPng = Uint8Array.from([...ONE_PX_PNG]); + newPng[20] = 0x02; // perturb so bytes differ + const plan: EditPlan = { + slides: [{ source: { slideIndex: 4 }, edits: [{ op: "setImage", elementId: imgId, data: newPng }] }], + }; + const out = await applyEdits(source, plan); + const zip = await loadZip(out); + await assertNoDanglingRels(zip); + // The blip now points at a fresh media part holding the new bytes. + const s4 = await zip.file("ppt/slides/slide4.xml")!.async("string"); + const embed = /]*\br:embed="([^"]+)"/.exec(s4)![1]; + const rels = await zip.file("ppt/slides/_rels/slide4.xml.rels")!.async("string"); + const target = new RegExp(`Id="${embed}"[^>]*Target="([^"]+)"`).exec(rels)![1]; + const mediaPath = normalise(target, "ppt/slides"); + const bytes = await zip.file(mediaPath)!.async("uint8array"); + expect(bytes[20]).toBe(0x02); + }); + + it("applies a solid background and 'transparent' inheritance", async () => { + const source = await buildTemplate(); + const plan: EditPlan = { + slides: [ + { source: { slideIndex: 1 }, background: "#123456", edits: [] }, + { source: { slideIndex: 4 }, background: "transparent", edits: [] }, + ], + }; + const out = await applyEdits(source, plan); + const zip = await loadZip(out); + expect(await zip.file("ppt/slides/slide1.xml")!.async("string")).toContain("123456"); + expect(await zip.file("ppt/slides/slide4.xml")!.async("string")).not.toContain(""); + }); + + it("surfaces a warning for an unresolved element id instead of throwing", async () => { + const source = await buildTemplate(); + const warnings: SerializeWarning[] = []; + const plan: EditPlan = { + slides: [{ source: { slideIndex: 1 }, edits: [{ op: "setText", elementId: "nope", text: "x" }] }], + }; + const out = await applyEdits(source, plan, { onWarning: (w) => warnings.push(w) }); + expect(out.length).toBeGreaterThan(0); + expect(warnings.some((w) => w.code === "element-write-failed")).toBe(true); + }); + + it("warns (does not crash) for unsupported layout instantiation", async () => { + const source = await buildTemplate(); + const warnings: SerializeWarning[] = []; + const plan: EditPlan = { + slides: [ + { source: { slideIndex: 1 }, edits: [] }, + { source: { layoutId: "layout-x" }, edits: [] }, + ], + }; + const out = await applyEdits(source, plan, { onWarning: (w) => warnings.push(w) }); + const zip = await loadZip(out); + expect(warnings.some((w) => w.code === "layout-unresolved")).toBe(true); + // The supported slide still ships. + expect(zip.file("ppt/slides/slide1.xml")).not.toBeNull(); + await assertNoDanglingRels(zip); + }); +}); diff --git a/packages/slidewise/src/lib/pptx/applyEdits.ts b/packages/slidewise/src/lib/pptx/applyEdits.ts new file mode 100644 index 0000000..3efa43c --- /dev/null +++ b/packages/slidewise/src/lib/pptx/applyEdits.ts @@ -0,0 +1,1298 @@ +/** + * `applyEdits` — lossless surgical-edit API. + * + * Where `serializeDeck` rebuilds an entire `.pptx` from a deck JSON (the lossy + * round-trip every fidelity bug comes from), `applyEdits` treats an edit as a + * **patch on the original bytes**: it unzips the source, copies every part + * verbatim, and mutates ONLY the slide XML / chart / media parts named by an + * edit. Untouched slides, masters, layouts, theme, fonts, tags, notes and + * embeddings come out byte-for-byte identical to the source. + * + * Elements are addressed by the same stable ids `parsePptx` returns + * (`deck.slides[i].elements[j].id`). The host flow is: + * `parsePptx(source)` → plan edits against the parsed JSON → + * `applyEdits(source, plan)`. + * Slides are addressed by 1-based source-slide index, since a plan is + * expressed relative to the template. + * + * See `.context/attachments/.../applyEdits` spec for the full contract. + */ +import JSZip from "jszip"; + +import type { + ChartElement, + ChartKind, + DiagramElement, + DiagramKind, + DiagramNode, + TextRun, +} from "../types"; +import { getElementLocation } from "./pptxToDeck"; +import { reconcileDanglingRels, type SerializeWarning } from "./deckToPptx"; +import { + synthesiseChart, + synthesiseDiagram, + RID_MARKER_RE, + hexBare, +} from "./pptxWriters"; +// ---------------------------------------------------------------------------- +// Public types +// ---------------------------------------------------------------------------- + +/** A bounding box in Slidewise canvas pixels (the same unit elements use). */ +export interface Rect { + x: number; + y: number; + w: number; + h: number; +} + +/** A chart series — mirrors `ChartElement["series"][number]`. */ +export interface Series { + name: string; + values: (number | null)[]; + color?: string; +} + +/** A run of styled text — a subset of {@link TextRun}. */ +export type Run = TextRun; + +export type Edit = + // TEXT — replace a slot's text. `runs` applies emphasis while preserving the + // box; omit `runs` to keep the template's run styling and just swap text. + | { op: "setText"; elementId: string; text: string; runs?: Run[] } + // CLEAR — blank a leftover sample/placeholder slot. + | { op: "clearText"; elementId: string } + // CHART (fill existing) — repopulate a native template chart with real data, + // keeping its type/colors/embedded workbook. The lossless path we most want. + | { + op: "setChartData"; + elementId: string; + categories: string[]; + series: Series[]; + } + // CHART (add) — draw a NEW native chart into a region. + | { + op: "addChart"; + bounds: Rect; + kind: ChartKind; + categories: string[]; + series: Series[]; + palette?: string[]; + title?: string; + } + // DIAGRAM — a first-class diagram drawn into a region. + | { + op: "addDiagram"; + bounds: Rect; + kind: DiagramKind; + nodes: DiagramNode[]; + palette?: string[]; + } + // TABLE — repopulate a native template table, keeping layout/borders/fills. + | { op: "setTableData"; elementId: string; rows: string[][]; hasHeader?: boolean } + // IMAGE — replace an image element's bytes honoring fit. + | { + op: "setImage"; + elementId: string; + data: Uint8Array | string; + fit?: "contain" | "cover"; + } + // REMOVE — delete an element entirely. + | { op: "removeElement"; elementId: string }; + +export interface PlannedSlide { + /** EXACTLY ONE source. */ + source: + | { slideIndex: number } // clone template slide N (1-based) verbatim, then edit + | { layoutId: string; fills?: Record }; // instantiate from a layout + /** Optional; "transparent" => inherit layout chrome; else a CSS colour. */ + background?: "transparent" | string; + edits: Edit[]; +} + +export interface EditPlan { + /** Deck title (also written to docProps). */ + title?: string; + /** Output order; this defines the final slide set + order. */ + slides: PlannedSlide[]; +} + +export interface ApplyEditsOptions { + onWarning?: (warning: SerializeWarning) => void; +} + +// ---------------------------------------------------------------------------- +// Entry point +// ---------------------------------------------------------------------------- + +export async function applyEdits( + source: Uint8Array, + plan: EditPlan, + options: ApplyEditsOptions = {} +): Promise { + const warn = (w: SerializeWarning) => options.onWarning?.(w); + const zip = await JSZip.loadAsync(source); + // A second, read-only view of the source. Clones (controlled slide reuse) + // copy from here so they never pick up edits already applied in-place to a + // first-use slide part. + const pristine = await JSZip.loadAsync(source); + + const presPath = "ppt/presentation.xml"; + const presRelsPath = "ppt/_rels/presentation.xml.rels"; + const presXml = await readText(zip, presPath); + const presRelsXml = await readText(zip, presRelsPath); + if (!presXml || !presRelsXml) { + // Not a presentation package we understand — return the input untouched. + warn({ code: "element-write-failed", message: "missing ppt/presentation.xml" }); + return source; + } + + // 1-based source slide index -> source part path (presentation order). + const sourceSlidePaths = resolveSlideOrder(presXml, presRelsXml); + + // Non-slide relationships in presentation.xml.rels are kept verbatim; slide + // relationships are rebuilt from the plan. + const presRels = parseRels(presRelsXml); + const keptPresRels = presRels.filter( + (r) => relTypeSuffix(r.type) !== "slide" + ); + let ridCounter = highestRidNumber(presRels); + const nextPresRid = () => `rId${++ridCounter}`; + + // Build the output slide set in plan order. + const usedSourcePaths = new Set(); + const outputSlides: { partPath: string; presRid: string }[] = []; + + for (let i = 0; i < plan.slides.length; i++) { + const planned = plan.slides[i]; + const built = await buildOutputSlide( + zip, + pristine, + planned, + i, + sourceSlidePaths, + usedSourcePaths, + warn + ); + if (!built) continue; + outputSlides.push({ partPath: built, presRid: nextPresRid() }); + } + + if (!outputSlides.length) { + warn({ + code: "element-write-failed", + message: "plan produced no slides; returning source unchanged", + }); + return source; + } + + // Rewrite presentation.xml.rels: kept non-slide rels + one slide rel each. + const newPresRels = [ + ...keptPresRels, + ...outputSlides.map((s) => ({ + id: s.presRid, + type: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide", + target: relativeTo(s.partPath, "ppt"), + mode: undefined as string | undefined, + })), + ]; + zip.file(presRelsPath, serializeRels(newPresRels)); + + // Rewrite in presentation.xml to the new order. + zip.file(presPath, rewriteSldIdLst(presXml, outputSlides.map((s) => s.presRid))); + + // Deck title -> docProps/core.xml. + if (plan.title != null) await setDocTitle(zip, plan.title); + + // Drop source slides (and their now-orphaned exclusive parts) that the plan + // didn't keep, then repair any rels left dangling by element removals. + await garbageCollect(zip); + await reconcileDanglingRels(zip); + await pruneDanglingContentTypes(zip); + + return zip.generateAsync({ type: "uint8array" }); +} + +// ---------------------------------------------------------------------------- +// Slide selection / cloning +// ---------------------------------------------------------------------------- + +/** + * Materialise one planned slide into a concrete output part path, applying its + * edits. The first use of a source slide keeps the original part (so untouched + * slides stay byte-identical); a repeat clones the part + its editable deps so + * the two copies can diverge. + */ +async function buildOutputSlide( + zip: JSZip, + pristine: JSZip, + planned: PlannedSlide, + outIndex: number, + sourceSlidePaths: string[], + used: Set, + warn: (w: SerializeWarning) => void +): Promise { + if (!("slideIndex" in planned.source)) { + // Layout instantiation isn't part of the lossless patch path — that's the + // from-scratch `serializeDeck` flow's job. Surface it instead of silently + // emitting a wrong slide. + warn({ + code: "layout-unresolved", + message: + `slide ${outIndex + 1}: instantiating from layout "${planned.source.layoutId}" ` + + "is not supported by applyEdits; use serializeDeck for layout-from-scratch slides", + layoutId: planned.source.layoutId, + slideIndex: outIndex, + }); + return null; + } + + const idx1 = planned.source.slideIndex; + const srcPath = sourceSlidePaths[idx1 - 1]; + if (!srcPath) { + warn({ + code: "element-write-failed", + message: `slide ${outIndex + 1}: source slideIndex ${idx1} out of range`, + slideIndex: outIndex, + }); + return null; + } + + let partPath = srcPath; + if (used.has(srcPath)) { + partPath = await cloneSlideDeep(zip, pristine, srcPath); + } + used.add(srcPath); + + await applySlideEdits(zip, partPath, planned, outIndex, warn); + return partPath; +} + +/** + * Deep-clone a slide part: copy the slide XML, its `.rels`, and every + * non-chrome dependency (charts + workbooks, media, notes) to fresh paths, + * rewriting the slide's rels to the copies. Shared chrome (layout/master/theme) + * keeps pointing at the originals. Used for controlled slide reuse so edits to + * one copy never bleed into another. + */ +async function cloneSlideDeep(zip: JSZip, pristine: JSZip, srcPath: string): Promise { + const newSlidePath = freshPartPath(zip, "ppt/slides", "slide", "xml"); + const slideXml = (await readText(pristine, srcPath)) ?? ""; + + const srcRelsXml = await readText(pristine, relsPathFor(srcPath)); + const rels = srcRelsXml ? parseRels(srcRelsXml) : []; + const srcDir = dirOf(srcPath); + + const cloned: typeof rels = []; + for (const rel of rels) { + if (rel.mode === "External" || /^https?:\/\//i.test(rel.target)) { + cloned.push(rel); + continue; + } + const suffix = relTypeSuffix(rel.type); + // Shared, read-only chrome — keep pointing at the original part. + if (suffix === "slideLayout" || suffix === "slideMaster" || suffix === "theme") { + cloned.push(rel); + continue; + } + const targetFull = normalisePath(rel.target, srcDir); + const copyFull = await copyPartTree(zip, pristine, targetFull); + cloned.push({ ...rel, target: relativeTo(copyFull, srcDir) }); + } + + zip.file(newSlidePath, slideXml); + zip.file(relsPathFor(newSlidePath), serializeRels(cloned)); + await ensureOverride(zip, "/" + newSlidePath, await overrideTypeFor(zip, "/" + srcPath)); + return newSlidePath; +} + +/** + * Copy a part and (recursively) the parts it references to fresh unique paths, + * returning the new path of the root. Each copied part's `.rels` is rewritten + * to point at the copies. Idempotent within a single call chain via `seen`. + */ +async function copyPartTree( + zip: JSZip, + pristine: JSZip, + partPath: string, + seen = new Map() +): Promise { + const existing = seen.get(partPath); + if (existing) return existing; + + const file = pristine.file(partPath); + if (!file) return partPath; // missing — leave the reference for reconcile. + + const dir = dirOf(partPath); + const base = baseOf(partPath); + const dot = base.lastIndexOf("."); + const stem = dot >= 0 ? base.slice(0, dot) : base; + const ext = dot >= 0 ? base.slice(dot + 1) : "bin"; + const newPath = freshPartPath(zip, dir, `${stem}_sw`, ext); + seen.set(partPath, newPath); + + const data = await file.async("uint8array"); + zip.file(newPath, data); + await ensureOverride(zip, "/" + newPath, await overrideTypeFor(zip, "/" + partPath)); + + const relsXml = await readText(pristine, relsPathFor(partPath)); + if (relsXml) { + const rels = parseRels(relsXml); + const rewritten = [] as typeof rels; + for (const rel of rels) { + if (rel.mode === "External" || /^https?:\/\//i.test(rel.target)) { + rewritten.push(rel); + continue; + } + const childFull = normalisePath(rel.target, dir); + const childCopy = await copyPartTree(zip, pristine, childFull, seen); + rewritten.push({ ...rel, target: relativeTo(childCopy, dir) }); + } + zip.file(relsPathFor(newPath), serializeRels(rewritten)); + } + return newPath; +} + +// ---------------------------------------------------------------------------- +// Edit application +// ---------------------------------------------------------------------------- + +async function applySlideEdits( + zip: JSZip, + slidePath: string, + planned: PlannedSlide, + outIndex: number, + warn: (w: SerializeWarning) => void +): Promise { + let slideXml = (await readText(zip, slidePath)) ?? ""; + const relsPath = relsPathFor(slidePath); + let rels = parseRels((await readText(zip, relsPath)) ?? EMPTY_RELS); + let relsDirty = false; + + const slideDir = dirOf(slidePath); + const nextRid = () => { + const id = `rId${highestRidNumber(rels) + 1}`; + return id; + }; + + for (const edit of planned.edits) { + try { + switch (edit.op) { + case "setText": + slideXml = editSetText(slideXml, edit.elementId, edit.text, edit.runs, slidePath, outIndex, warn); + break; + case "clearText": + slideXml = editSetText(slideXml, edit.elementId, "", undefined, slidePath, outIndex, warn); + break; + case "removeElement": { + const r = editRemoveElement(slideXml, edit.elementId, rels, slidePath, outIndex, warn); + slideXml = r.slideXml; + if (r.relsChanged) relsDirty = true; + break; + } + case "setTableData": + slideXml = editSetTableData(slideXml, edit.elementId, edit.rows, slidePath, outIndex, warn); + break; + case "setChartData": + await editSetChartData(zip, slideXml, edit, rels, slideDir, slidePath, outIndex, warn); + break; + case "setImage": { + const rid = nextRid(); + const r = editSetImage(slideXml, edit, rid, slidePath, outIndex, warn); + if (r) { + slideXml = r.slideXml; + zip.file(r.media.fullPath, r.media.data); + await ensureDefault(zip, r.media.ext, r.media.contentType); + rels.push({ id: rid, type: IMAGE_REL_TYPE, target: r.media.relTarget, mode: undefined }); + relsDirty = true; + } + break; + } + case "addChart": { + const rid = nextRid(); + const r = await editAddChart(zip, slideXml, edit, rid); + slideXml = r.slideXml; + rels.push({ id: rid, type: CHART_REL_TYPE, target: r.relTarget, mode: undefined }); + relsDirty = true; + break; + } + case "addDiagram": + slideXml = editAddDiagram(slideXml, edit); + break; + } + } catch (err) { + warn({ + code: "element-write-failed", + message: `slide ${outIndex + 1}: edit "${edit.op}" failed: ${(err as Error).message}`, + elementId: "elementId" in edit ? edit.elementId : undefined, + slideIndex: outIndex, + }); + } + } + + if (planned.background !== undefined) { + slideXml = applyBackground(slideXml, planned.background); + } + + zip.file(slidePath, slideXml); + if (relsDirty) zip.file(relsPath, serializeRels(rels)); +} + +// -- text -------------------------------------------------------------------- + +function editSetText( + slideXml: string, + elementId: string, + text: string, + runs: Run[] | undefined, + slidePath: string, + outIndex: number, + warn: (w: SerializeWarning) => void +): string { + const block = locateBlock(slideXml, elementId, slidePath, outIndex, warn); + if (!block) return slideXml; + const next = rewriteTextBody(block, text, runs); + return slideXml.replace(block, next); +} + +/** + * Replace the visible text of a shape's ``. With `runs` omitted we + * keep the template's first run/paragraph styling and just swap the text; with + * `runs` we rebuild the paragraph from the supplied runs (bold/colour/etc). + */ +function rewriteTextBody(block: string, text: string, runs?: Run[]): string { + const txMatch = /([\s\S]*?)<\/p:txBody>/.exec(block); + const inner = txMatch?.[1] ?? ""; + const bodyPr = firstElement(inner, "a:bodyPr"); + const lstStyle = firstElement(inner, "a:lstStyle"); + const firstPPr = firstElement(inner, "a:pPr"); + const firstRPr = firstElement(inner, "a:rPr"); + + const head = (bodyPr ?? "") + (lstStyle ?? ""); + + let paras: string; + if (runs && runs.length) { + const runXml = runs.map((r) => runToXml(r)).join(""); + paras = `${firstPPr ?? ""}${runXml}`; + } else { + const lines = text.split("\n"); + paras = lines + .map((line) => { + if (line === "") return `${firstPPr ?? ""}`; + const rPr = firstRPr ?? ``; + return `${firstPPr ?? ""}${rPr}${escapeText(line)}`; + }) + .join(""); + } + const newTxBody = `${head}${paras}`; + + if (txMatch) return block.replace(txMatch[0], newTxBody); + // No existing txBody (e.g. an empty autoshape) — append one before . + return block.replace(/<\/p:sp>\s*$/, `${newTxBody}`); +} + +function runToXml(r: Run): string { + const attrs: string[] = [`lang="en-US"`]; + if (r.fontSize != null) attrs.push(`sz="${Math.round(r.fontSize * 100)}"`); + if (r.fontWeight != null) attrs.push(`b="${r.fontWeight >= 600 ? 1 : 0}"`); + if (r.italic) attrs.push(`i="1"`); + if (r.underline) attrs.push(`u="sng"`); + if (r.strike) attrs.push(`strike="sngStrike"`); + const kids: string[] = []; + if (r.color) kids.push(``); + if (r.fontFamily) kids.push(``); + const rPr = kids.length + ? `${kids.join("")}` + : ``; + const textParts = (r.text ?? "").split("\n"); + return textParts + .map((t, i) => (i === 0 ? "" : "") + `${rPr}${escapeText(t)}`) + .join(""); +} + +// -- remove ------------------------------------------------------------------ + +function editRemoveElement( + slideXml: string, + elementId: string, + rels: Rel[], + slidePath: string, + outIndex: number, + warn: (w: SerializeWarning) => void +): { slideXml: string; relsChanged: boolean } { + const block = locateBlock(slideXml, elementId, slidePath, outIndex, warn); + if (!block) return { slideXml, relsChanged: false }; + const without = slideXml.replace(block, ""); + + // Drop slide rels referenced only by the removed block (the part itself is + // reclaimed later by garbageCollect). + const rids = [...block.matchAll(/r:(?:id|embed|link)="([^"]+)"/g)].map((m) => m[1]); + let relsChanged = false; + for (const rid of new Set(rids)) { + if (!new RegExp(`"${rid}"`).test(without)) { + const i = rels.findIndex((r) => r.id === rid); + if (i >= 0) { + rels.splice(i, 1); + relsChanged = true; + } + } + } + return { slideXml: without, relsChanged }; +} + +// -- table ------------------------------------------------------------------- + +function editSetTableData( + slideXml: string, + elementId: string, + rows: string[][], + slidePath: string, + outIndex: number, + warn: (w: SerializeWarning) => void +): string { + const block = locateBlock(slideXml, elementId, slidePath, outIndex, warn); + if (!block) return slideXml; + const tblMatch = /[\s\S]*<\/a:tbl>/.exec(block); + if (!tblMatch) { + warn({ code: "element-write-failed", message: `setTableData: element ${elementId} is not a table`, elementId, slideIndex: outIndex }); + return slideXml; + } + const trRe = //g; + let r = 0; + const newTbl = tblMatch[0].replace(trRe, (tr) => { + const rowData = rows[r++]; + if (!rowData) return tr; + let c = 0; + return tr.replace(//g, (tc) => { + const val = rowData[c++]; + if (val == null) return tc; + return rewriteTableCell(tc, val); + }); + }); + const nextBlock = block.replace(tblMatch[0], newTbl); + return slideXml.replace(block, nextBlock); +} + +function rewriteTableCell(tc: string, text: string): string { + const txMatch = /([\s\S]*?)<\/a:txBody>/.exec(tc); + if (!txMatch) return tc; + const inner = txMatch[1]; + const bodyPr = firstElement(inner, "a:bodyPr") ?? ""; + const lstStyle = firstElement(inner, "a:lstStyle") ?? ""; + const firstPPr = firstElement(inner, "a:pPr") ?? ""; + const firstRPr = firstElement(inner, "a:rPr") ?? ``; + const lines = text.split("\n"); + const paras = lines + .map((line) => + line === "" + ? `${firstPPr}` + : `${firstPPr}${firstRPr}${escapeText(line)}` + ) + .join(""); + return tc.replace(txMatch[0], `${bodyPr}${lstStyle}${paras}`); +} + +// -- chart (fill existing) --------------------------------------------------- + +async function editSetChartData( + zip: JSZip, + slideXml: string, + edit: Extract, + slideRels: Rel[], + slideDir: string, + slidePath: string, + outIndex: number, + warn: (w: SerializeWarning) => void +): Promise { + const block = locateBlock(slideXml, edit.elementId, slidePath, outIndex, warn); + if (!block) return; + const chartRid = /]*\br:id="([^"]+)"/.exec(block)?.[1]; + const rel = chartRid ? slideRels.find((r) => r.id === chartRid) : undefined; + if (!rel) { + warn({ code: "element-write-failed", message: `setChartData: element ${edit.elementId} has no chart relationship`, elementId: edit.elementId, slideIndex: outIndex }); + return; + } + const chartPath = normalisePath(rel.target, slideDir); + const chartXml = await readText(zip, chartPath); + if (!chartXml) { + warn({ code: "element-write-failed", message: `setChartData: chart part ${chartPath} missing`, elementId: edit.elementId, slideIndex: outIndex }); + return; + } + + const updated = rewriteChartSeries(chartXml, edit.categories, edit.series); + + // Regenerate the embedded workbook so Edit-Data reflects the new data, and + // make sure the chart references it via . + const chartRelsPath = relsPathFor(chartPath); + const chartDir = dirOf(chartPath); + const chartRels = parseRels((await readText(zip, chartRelsPath)) ?? EMPTY_RELS); + const wbRel = chartRels.find((r) => relTypeSuffix(r.type) === "package"); + const workbook = await buildChartWorkbook(edit.categories, edit.series); + + let finalChartXml = updated; + if (wbRel) { + // Overwrite the existing embedded workbook in place — the chart already + // references it via , so caches + workbook stay in sync. + zip.file(normalisePath(wbRel.target, chartDir), workbook); + } else { + const wbName = baseOf(freshPartPath(zip, "ppt/embeddings", "MicrosoftSWWorkbook", "xlsx")); + const wbPath = `ppt/embeddings/${wbName}`; + zip.file(wbPath, workbook); + await ensureOverride(zip, "/" + wbPath, XLSX_CONTENT_TYPE); + const wbRid = `rId${highestRidNumber(chartRels) + 1}`; + chartRels.push({ id: wbRid, type: PACKAGE_REL_TYPE, target: relativeTo(wbPath, chartDir), mode: undefined }); + zip.file(chartRelsPath, serializeRels(chartRels)); + finalChartXml = ensureExternalData(updated, wbRid); + } + zip.file(chartPath, finalChartXml); +} + +/** + * Replace each series' ``/``/`` with the new categories + + * values while leaving the chart type, colours (``), data-label and + * marker settings untouched. Series are matched by position; extras are dropped + * and missing ones cloned from the first series as a template. + */ +function rewriteChartSeries(chartXml: string, categories: string[], series: Series[]): string { + const serRe = /[\s\S]*?<\/c:ser>/g; + const existing = chartXml.match(serRe) ?? []; + if (!existing.length) return chartXml; + + const template = existing[0]; + const firstSer = existing[0]; + const lastSer = existing[existing.length - 1]; + if (!firstSer || !lastSer) return chartXml; + const built: string[] = series.map((s, i) => { + const base = existing[i] ?? template; + return buildSer(base, i, categories, s); + }); + + // Splice the new series list in place of the old one. + const firstIdx = chartXml.indexOf(firstSer); + const lastIdx = chartXml.indexOf(lastSer) + lastSer.length; + return chartXml.slice(0, firstIdx) + built.join("") + chartXml.slice(lastIdx); +} + +function buildSer(template: string, idx: number, categories: string[], s: Series): string { + let ser = template; + // idx / order + ser = ser.replace(//, ``); + ser = ser.replace(//, ``); + + const col = colLetter(idx + 1); // A reserved for categories + const n = categories.length; + const tx = `Sheet1!$${col}$1${escapeText(s.name || `Series ${idx + 1}`)}`; + const catPts = categories.map((c, i) => `${escapeText(String(c))}`).join(""); + const cat = `Sheet1!$A$2:$A$${n + 1}${catPts}`; + const valPts = s.values.map((v, i) => (v == null ? "" : `${v}`)).join(""); + const val = `Sheet1!$${col}$2:$${col}$${n + 1}General${valPts}`; + + if (/[\s\S]*?<\/c:tx>/.test(ser)) ser = ser.replace(/[\s\S]*?<\/c:tx>/, tx); + else ser = ser.replace(/()/, `$1${tx}`); + + if (/[\s\S]*?<\/c:cat>/.test(ser)) ser = ser.replace(/[\s\S]*?<\/c:cat>/, cat); + else ser = ser.replace(/()/, `${cat}$1`); + + if (/[\s\S]*?<\/c:val>/.test(ser)) ser = ser.replace(/[\s\S]*?<\/c:val>/, val); + else ser = ser.replace(/(<\/c:ser>)/, `${val}$1`); + + // Apply an explicit series colour when supplied (keeps template colour if not). + if (s.color) { + const spPr = ``; + if (/[\s\S]*?<\/c:spPr>/.test(ser)) ser = ser.replace(/[\s\S]*?<\/c:spPr>/, spPr); + else ser = ser.replace(/([\s\S]*?<\/c:tx>)/, `$1${spPr}`); + } + return ser; +} + +function ensureExternalData(chartXml: string, rid: string): string { + if (/`; + return chartXml.replace(/<\/c:chartSpace>/, `${ext}`); +} + +// -- image ------------------------------------------------------------------- + +function editSetImage( + slideXml: string, + edit: Extract, + newRid: string, + slidePath: string, + outIndex: number, + warn: (w: SerializeWarning) => void +): { slideXml: string; media: DecodedMedia } | null { + const block = locateBlock(slideXml, edit.elementId, slidePath, outIndex, warn); + if (!block) return null; + const blipMatch = /]*\br:embed="([^"]+)"/.exec(block); + if (!blipMatch) { + warn({ code: "element-write-failed", message: `setImage: element ${edit.elementId} has no image blip`, elementId: edit.elementId, slideIndex: outIndex }); + return null; + } + const media = decodeMedia(edit.data, edit.elementId); + // Repoint the blip at a fresh media part rather than overwriting bytes in + // place — the old media is reclaimed by garbageCollect if nothing else uses + // it, and a new extension/content-type stays correct. + const newBlock = block.replace( + new RegExp(`(]*\\br:embed=")${escapeRegExp(blipMatch[1])}(")`), + `$1${newRid}$2` + ); + return { slideXml: slideXml.replace(block, newBlock), media }; +} + +// -- add chart / diagram ----------------------------------------------------- + +async function editAddChart( + zip: JSZip, + slideXml: string, + edit: Extract, + rid: string +): Promise<{ slideXml: string; relTarget: string }> { + const el: ChartElement = { + id: `add_${rid}`, + type: "chart", + x: edit.bounds.x, + y: edit.bounds.y, + w: edit.bounds.w, + h: edit.bounds.h, + rotation: 0, + z: 0, + kind: edit.kind, + categories: edit.categories, + series: edit.series, + ...(edit.title ? { title: edit.title } : {}), + }; + const synth = synthesiseChart(el); + const partPath = freshSynthPath(zip, synth.partPath); + const partRelsPath = relsPathFor(partPath); + zip.file(partPath, synth.chartXml); + zip.file(partRelsPath, synth.chartRelsXml); + await ensureOverride(zip, "/" + partPath, CHART_CONTENT_TYPE); + + const graphicFrame = synth.graphicFrameXml.replace(RID_MARKER_RE, rid); + const next = spliceIntoSpTree(slideXml, graphicFrame); + return { slideXml: next, relTarget: relativeTo(partPath, "ppt/slides") }; +} + +function editAddDiagram(slideXml: string, edit: Extract): string { + const el: DiagramElement = { + id: `dgm_${Math.abs(hashString(JSON.stringify(edit.nodes))).toString(36)}`, + type: "diagram", + x: edit.bounds.x, + y: edit.bounds.y, + w: edit.bounds.w, + h: edit.bounds.h, + rotation: 0, + z: 0, + kind: edit.kind, + nodes: edit.nodes, + ...(edit.palette ? { palette: edit.palette } : {}), + }; + const synth = synthesiseDiagram(el); + return spliceIntoSpTree(slideXml, synth.xml); +} + +function spliceIntoSpTree(slideXml: string, fragment: string): string { + return slideXml.replace(/<\/p:spTree>/, `${fragment}`); +} + +// -- background -------------------------------------------------------------- + +function applyBackground(slideXml: string, background: string): string { + // Strip any existing first. + let xml = slideXml.replace(/[\s\S]*?<\/p:bg>/, ""); + if (background === "transparent") return xml; // inherit layout chrome + const bg = ``; + // must be the first child of , before . + return xml.replace(/()/, `$1${bg}`); +} + +// ---------------------------------------------------------------------------- +// Package-level helpers +// ---------------------------------------------------------------------------- + +/** Resolve presentation order to 1-based-indexed source slide part paths. */ +function resolveSlideOrder(presXml: string, presRelsXml: string): string[] { + const rels = parseRels(presRelsXml); + const byId = new Map(rels.map((r) => [r.id, r])); + const out: string[] = []; + for (const m of presXml.matchAll(/]*>/g)) { + const rid = /\br:id="([^"]+)"/.exec(m[0])?.[1]; + if (!rid) continue; + const rel = byId.get(rid); + if (!rel) continue; + out.push(normalisePath(rel.target, "ppt")); + } + return out; +} + +/** Rewrite with one per output slide rId, in order. */ +function rewriteSldIdLst(presXml: string, rids: string[]): string { + let id = 256; + const entries = rids.map((rid) => ``).join(""); + const lst = `${entries}`; + if (//.test(presXml)) return presXml.replace(//, lst); + if (/[\s\S]*?<\/p:sldIdLst>/.test(presXml)) + return presXml.replace(/[\s\S]*?<\/p:sldIdLst>/, lst); + // No list yet — insert right after . + return presXml.replace(/(]*>)/, `$1${lst}`); +} + +async function setDocTitle(zip: JSZip, title: string): Promise { + const path = "docProps/core.xml"; + const xml = await readText(zip, path); + if (!xml) return; // no core props part — leave it; not worth synthesising. + let next: string; + if (/[\s\S]*?<\/dc:title>/.test(xml)) { + next = xml.replace(/[\s\S]*?<\/dc:title>/, `${escapeText(title)}`); + } else { + next = xml.replace(/(]*>)/, `$1${escapeText(title)}`); + } + zip.file(path, next); +} + +/** + * Reclaim parts no longer reachable from the package root relationships. After + * the plan drops source slides, this removes those slide parts and any media / + * charts / notes that were exclusive to them — no per-rel-type special-casing. + */ +async function garbageCollect(zip: JSZip): Promise { + const present = new Set(); + zip.forEach((p, e) => { + if (!e.dir) present.add(p); + }); + + const reachable = new Set(); + const queue: string[] = []; + const visit = async (relsPath: string, ownerDir: string) => { + const xml = await readText(zip, relsPath); + if (!xml) return; + for (const rel of parseRels(xml)) { + if (rel.mode === "External" || /^https?:\/\//i.test(rel.target)) continue; + const full = normalisePath(rel.target, ownerDir); + if (present.has(full) && !reachable.has(full)) { + reachable.add(full); + queue.push(full); + } + } + }; + + await visit("_rels/.rels", ""); + while (queue.length) { + const part = queue.shift()!; + await visit(relsPathFor(part), dirOf(part)); + } + + for (const p of present) { + if (p === "[Content_Types].xml") continue; + if (p === "_rels/.rels") continue; // mandatory package root rels + if (p.endsWith(".rels")) { + // Keep a .rels iff the part it describes is reachable. + const owner = p.replace(/(^|\/)_rels\/([^/]+)\.rels$/, "$1$2"); + if (reachable.has(owner)) continue; + zip.remove(p); + continue; + } + if (!reachable.has(p)) zip.remove(p); + } +} + +async function pruneDanglingContentTypes(zip: JSZip): Promise { + const path = "[Content_Types].xml"; + const xml = await readText(zip, path); + if (!xml) return; + const present = new Set(); + zip.forEach((p, e) => { + if (!e.dir) present.add("/" + p); + }); + let changed = false; + const next = xml.replace(/]*\/>/g, (tag) => { + const part = /\bPartName="([^"]+)"/.exec(tag)?.[1]; + if (part && !present.has(part)) { + changed = true; + return ""; + } + return tag; + }); + if (changed) zip.file(path, next); +} + +// -- content types ----------------------------------------------------------- + +async function ensureOverride(zip: JSZip, partName: string, contentType: string | null): Promise { + if (!contentType) return; + const path = "[Content_Types].xml"; + const xml = await readText(zip, path); + if (!xml) return; + if (new RegExp(`PartName="${escapeRegExp(partName)}"`).test(xml)) return; + const override = ``; + zip.file(path, xml.replace(/<\/Types>/, `${override}`)); +} + +async function ensureDefault(zip: JSZip, ext: string, contentType: string): Promise { + const path = "[Content_Types].xml"; + const xml = await readText(zip, path); + if (!xml) return; + if (new RegExp(`Extension="${escapeRegExp(ext)}"`, "i").test(xml)) return; + const def = ``; + zip.file(path, xml.replace(/(]*>)/, `$1${def}`)); +} + +async function overrideTypeFor(zip: JSZip, partName: string): Promise { + const xml = await readText(zip, "[Content_Types].xml"); + if (!xml) return null; + const m = new RegExp(`]*PartName="${escapeRegExp(partName)}"[^>]*>`).exec(xml); + return m ? /\bContentType="([^"]+)"/.exec(m[0])?.[1] ?? null : null; +} + +// ---------------------------------------------------------------------------- +// Element location +// ---------------------------------------------------------------------------- + +/** + * Find an element's verbatim XML block inside the slide XML. The block string + * comes from the parser's location registry (the exact substring it extracted + * from the source slide), so it is present verbatim in the source — and in any + * clone, since clones copy the slide XML byte-for-byte. + */ +function locateBlock( + slideXml: string, + elementId: string, + slidePath: string, + outIndex: number, + warn: (w: SerializeWarning) => void +): string | null { + const loc = getElementLocation(elementId); + if (!loc) { + warn({ + code: "element-write-failed", + message: `element ${elementId} not found (was the source parsed in this process?)`, + elementId, + slideIndex: outIndex, + }); + return null; + } + if (slideXml.includes(loc.xml)) return loc.xml; + warn({ + code: "element-write-failed", + message: `element ${elementId} not present in ${slidePath}`, + elementId, + slideIndex: outIndex, + }); + return null; +} + +// ---------------------------------------------------------------------------- +// Embedded-workbook generation +// ---------------------------------------------------------------------------- + +/** Build a minimal valid `.xlsx` mirroring the chart's categories + series so + * PowerPoint's Edit-Data shows the real numbers. Layout is canonical: + * column A = categories (rows 2..n+1), series i in column B+i (header row 1). */ +async function buildChartWorkbook(categories: string[], series: Series[]): Promise { + const n = categories.length; + const rows: string[] = []; + // Header row. + const headerCells = series + .map((s, i) => inlineStrCell(`${colLetter(i + 1)}1`, s.name || `Series ${i + 1}`)) + .join(""); + rows.push(`${headerCells}`); + // Data rows. + for (let r = 0; r < n; r++) { + const cells: string[] = [inlineStrCell(`A${r + 2}`, String(categories[r]))]; + series.forEach((s, i) => { + const v = s.values[r]; + if (v != null) cells.push(`${v}`); + }); + rows.push(`${cells.join("")}`); + } + + const sheet = + `` + + `` + + `${rows.join("")}`; + + const wb = new JSZip(); + wb.file( + "[Content_Types].xml", + `` + + `` + + `` + + `` + + `` + + `` + + `` + + `` + ); + wb.file( + "_rels/.rels", + `` + + `` + + `` + + `` + ); + wb.file( + "xl/workbook.xml", + `` + + `` + + `` + ); + wb.file( + "xl/_rels/workbook.xml.rels", + `` + + `` + + `` + + `` + + `` + ); + wb.file("xl/worksheets/sheet1.xml", sheet); + wb.file( + "xl/styles.xml", + `` + + `` + + `` + + `` + + `` + + `` + + `` + + `` + ); + return wb.generateAsync({ type: "uint8array" }); +} + +function inlineStrCell(ref: string, text: string): string { + return `${escapeText(text)}`; +} + +// ---------------------------------------------------------------------------- +// Media decoding +// ---------------------------------------------------------------------------- + +interface DecodedMedia { + fullPath: string; + relTarget: string; + data: Uint8Array; + ext: string; + contentType: string; +} + +function decodeMedia(data: Uint8Array | string, scope: string): DecodedMedia { + let bytes: Uint8Array; + let mime = "image/png"; + if (typeof data === "string") { + const comma = data.indexOf(","); + const header = data.slice(0, comma); + mime = /^data:([^;,]+)/.exec(header)?.[1] ?? "image/png"; + bytes = header.includes(";base64") + ? decodeBase64(data.slice(comma + 1)) + : new TextEncoder().encode(decodeURIComponent(data.slice(comma + 1))); + } else { + bytes = data; + mime = sniffImageMime(data) ?? "image/png"; + } + const ext = mime.split("/")[1]?.split("+")[0] ?? "png"; + const safe = scope.replace(/[^a-zA-Z0-9]+/g, "_"); + return { + fullPath: `ppt/media/imageSW_${safe}.${ext}`, + relTarget: `../media/imageSW_${safe}.${ext}`, + data: bytes, + ext, + contentType: mime, + }; +} + +function sniffImageMime(b: Uint8Array): string | null { + if (b[0] === 0x89 && b[1] === 0x50) return "image/png"; + if (b[0] === 0xff && b[1] === 0xd8) return "image/jpeg"; + if (b[0] === 0x47 && b[1] === 0x49) return "image/gif"; + return null; +} + +function decodeBase64(b64: string): Uint8Array { + const clean = b64.replace(/\s+/g, ""); + if (typeof atob === "function") { + const bin = atob(clean); + const out = new Uint8Array(bin.length); + for (let i = 0; i < bin.length; i++) out[i] = bin.charCodeAt(i); + return out; + } + const B = (globalThis as unknown as { Buffer?: { from(b: string, e: string): Uint8Array } }).Buffer; + if (B) return B.from(clean, "base64"); + throw new Error("[slidewise] no base64 decoder available"); +} + +// ---------------------------------------------------------------------------- +// Relationship + path utilities +// ---------------------------------------------------------------------------- + +interface Rel { + id: string; + type: string; + target: string; + mode: string | undefined; +} + +const EMPTY_RELS = + `` + + ``; + +function parseRels(xml: string): Rel[] { + const out: Rel[] = []; + for (const m of xml.matchAll(/]*\/?>/g)) { + const tag = m[0]; + const id = /\bId="([^"]+)"/.exec(tag)?.[1]; + const type = /\bType="([^"]+)"/.exec(tag)?.[1]; + const target = /\bTarget="([^"]+)"/.exec(tag)?.[1]; + const mode = /\bTargetMode="([^"]+)"/.exec(tag)?.[1]; + if (id && type && target) out.push({ id, type, target, mode }); + } + return out; +} + +function serializeRels(rels: Rel[]): string { + const body = rels + .map( + (r) => + `` + ) + .join(""); + return ( + `` + + `${body}` + ); +} + +function highestRidNumber(rels: Rel[]): number { + let max = 0; + for (const r of rels) { + const n = /^rId(\d+)$/.exec(r.id); + if (n) max = Math.max(max, Number(n[1])); + } + return max; +} + +function relTypeSuffix(type: string): string { + const slash = type.lastIndexOf("/"); + return slash >= 0 ? type.slice(slash + 1) : type; +} + +function relsPathFor(partPath: string): string { + const dir = dirOf(partPath); + const base = baseOf(partPath); + return dir ? `${dir}/_rels/${base}.rels` : `_rels/${base}.rels`; +} + +function dirOf(p: string): string { + const slash = p.lastIndexOf("/"); + return slash >= 0 ? p.slice(0, slash) : ""; +} + +function baseOf(p: string): string { + const slash = p.lastIndexOf("/"); + return slash >= 0 ? p.slice(slash + 1) : p; +} + +/** Resolve a relationship target relative to its owner directory, collapsing + * `..` / `.` segments. */ +function normalisePath(target: string, baseDir: string): string { + if (target.startsWith("/")) return target.slice(1); + const segs = (baseDir ? baseDir.split("/") : []).concat(target.split("/")); + const out: string[] = []; + for (const s of segs) { + if (s === "" || s === ".") continue; + if (s === "..") out.pop(); + else out.push(s); + } + return out.join("/"); +} + +/** Express `partPath` relative to `fromDir` (a package directory). */ +function relativeTo(partPath: string, fromDir: string): string { + const from = fromDir ? fromDir.split("/") : []; + const to = partPath.split("/"); + let i = 0; + while (i < from.length && i < to.length && from[i] === to[i]) i++; + const up = from.slice(i).map(() => ".."); + return [...up, ...to.slice(i)].join("/"); +} + +/** Allocate an unused part path `dir/N.ext`. */ +function freshPartPath(zip: JSZip, dir: string, stem: string, ext: string): string { + let n = 1; + for (;;) { + const p = `${dir}/${stem}${n}.${ext}`; + if (!zip.file(p)) return p; + n++; + } +} + +/** Pick an unused chart-part path near a synthesiser's suggestion. */ +function freshSynthPath(zip: JSZip, suggested: string): string { + if (!zip.file(suggested)) return suggested; + const dot = suggested.lastIndexOf("."); + const stem = suggested.slice(0, dot); + const ext = suggested.slice(dot + 1); + let n = 2; + for (;;) { + const p = `${stem}_${n}.${ext}`; + if (!zip.file(p)) return p; + n++; + } +} + +async function readText(zip: JSZip, path: string): Promise { + const file = zip.file(path); + return file ? file.async("string") : null; +} + +function colLetter(n: number): string { + // 1 -> A, 26 -> Z, 27 -> AA … + let s = ""; + while (n > 0) { + const r = (n - 1) % 26; + s = String.fromCharCode(65 + r) + s; + n = Math.floor((n - 1) / 26); + } + return s; +} + +/** + * Return the first complete `` or `` element in `xml`. + * Unlike a `[\s\S]*?(?:/>|)` shortcut, this never stops early on a + * self-closing CHILD (e.g. `` inside an ``). The tags it + * is used on (`a:rPr`, `a:pPr`, `a:bodyPr`, `a:lstStyle`) don't self-nest. + */ +function firstElement(xml: string, tag: string): string | undefined { + const re = new RegExp(`<${tag}\\b[^>]*\\/>|<${tag}\\b[^>]*>[\\s\\S]*?<\\/${tag}>`); + return re.exec(xml)?.[0]; +} + +function escapeText(s: string): string { + return s.replace(/&/g, "&").replace(//g, ">"); +} + +function escapeAttr(s: string): string { + return s.replace(/&/g, "&").replace(/`, this is recorded for + * EVERY top-level element and group child with no filtering. + */ +export interface ElementLocation { + /** Source slide part path, e.g. `ppt/slides/slide2.xml`. */ + slidePath: string; + /** Verbatim ``/``/``/``/`` + * block, exactly as it appears in the source slide XML. */ + xml: string; +} + +/** + * Complete element-id → source-location index. The surgical-edit API + * (`applyEdits`) uses it to find and patch an element inside its source + * slide XML by the same stable id `parsePptx` returns. Module-global / + * in-memory, populated by the most recent `parsePptx`; the host calls + * `parsePptx(source)` then `applyEdits(source, plan)` in the same process, + * so it is warm. (Mirrors `elementSourceRegistry`'s lifetime.) + */ +const elementLocationRegistry = new Map(); + +export function getElementLocation( + elementId: string +): ElementLocation | undefined { + return elementLocationRegistry.get(elementId); +} + +function registerElementLocation( + element: SlideElement, + rawXml: string | undefined, + slidePath: string +): void { + if (!rawXml) return; + elementLocationRegistry.set(element.id, { slidePath, xml: rawXml }); +} + export function snapshotElement(element: SlideElement): string { return JSON.stringify(snapshotFields(element)); } @@ -1036,24 +1075,28 @@ async function parseSpTree( const el = await parseSpOrText(node, ctx, outer); if (el) { registerElementSource(el, rawSrc, ctx.slidePath, ctx.theme); + registerElementLocation(el, rawSrc, ctx.slidePath); out.push(el); } } else if (tag === "p:pic") { const el = await parsePic(node, ctx, outer); if (el) { registerElementSource(el, rawSrc, ctx.slidePath, ctx.theme); + registerElementLocation(el, rawSrc, ctx.slidePath); out.push(el); } } else if (tag === "p:cxnSp") { const el = parseCxn(node, ctx, outer); if (el) { registerElementSource(el, rawSrc, ctx.slidePath, ctx.theme); + registerElementLocation(el, rawSrc, ctx.slidePath); out.push(el); } } else if (tag === "p:graphicFrame") { const el = await parseGraphicFrame(node, ctx, outer); if (el) { registerElementSource(el, rawSrc, ctx.slidePath, ctx.theme); + registerElementLocation(el, rawSrc, ctx.slidePath); out.push(el); } } else if (tag === "p:grpSp") { @@ -1073,6 +1116,7 @@ async function parseSpTree( // descendant is edited the snapshot diverges (see snapshotElement) // and the synth path re-emits the group instead. registerElementSource(group, rawSrc, ctx.slidePath, ctx.theme); + registerElementLocation(group, rawSrc, ctx.slidePath); out.push(group); } }