Update the named page destinations when some pdf are combined (bug 1997379)

and remove link annotations pointing on a deleted page.
This commit is contained in:
Calixte Denizet 2025-11-07 18:21:51 +01:00
parent 57334bd205
commit 37f4712f7e
6 changed files with 415 additions and 15 deletions

View File

@ -32,6 +32,8 @@ class PageData {
this.page = page; this.page = page;
this.documentData = documentData; this.documentData = documentData;
this.annotations = null; this.annotations = null;
// Named destinations which points to this page.
this.pointingNamedDestinations = null;
documentData.pagesMap.put(page.ref, this); documentData.pagesMap.put(page.ref, this);
} }
@ -40,9 +42,13 @@ class PageData {
class DocumentData { class DocumentData {
constructor(document) { constructor(document) {
this.document = document; this.document = document;
this.destinations = null;
this.pageLabels = null; this.pageLabels = null;
this.pagesMap = new RefSetCache(); this.pagesMap = new RefSetCache();
this.oldRefMapping = new RefSetCache(); this.oldRefMapping = new RefSetCache();
this.dedupNamedDestinations = new Map();
this.usedNamedDestinations = new Set();
this.postponedRefCopies = new RefSetCache();
} }
} }
@ -64,6 +70,7 @@ class PDFEditor {
this.title = title; this.title = title;
this.author = author; this.author = author;
this.pageLabels = null; this.pageLabels = null;
this.namedDestinations = new Map();
} }
/** /**
@ -114,15 +121,21 @@ class PDFEditor {
if (newRef) { if (newRef) {
return newRef; return newRef;
} }
const oldRef = obj;
obj = await xref.fetchAsync(oldRef);
if (typeof obj === "number") {
// Simple value; no need to create a new reference.
return obj;
}
newRef = this.newRef; newRef = this.newRef;
oldRefMapping.put(obj, newRef); oldRefMapping.put(oldRef, newRef);
obj = await xref.fetchAsync(obj);
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
if ( if (
obj instanceof Dict && obj instanceof Dict &&
isName(obj.get("Type"), "Page") && isName(obj.get("Type"), "Page") &&
!this.currentDocument.pagesMap.has(obj) !this.currentDocument.pagesMap.has(oldRef)
) { ) {
throw new Error( throw new Error(
"Add a deleted page to the document is not supported." "Add a deleted page to the document is not supported."
@ -134,11 +147,20 @@ class PDFEditor {
return newRef; return newRef;
} }
const promises = []; const promises = [];
const {
currentDocument: { postponedRefCopies },
} = this;
if (Array.isArray(obj)) { if (Array.isArray(obj)) {
if (mustClone) { if (mustClone) {
obj = obj.slice(); obj = obj.slice();
} }
for (let i = 0, ii = obj.length; i < ii; i++) { for (let i = 0, ii = obj.length; i < ii; i++) {
const postponedActions = postponedRefCopies.get(obj[i]);
if (postponedActions) {
// The object is a reference that needs to be copied later.
postponedActions.push(ref => (obj[i] = ref));
continue;
}
promises.push( promises.push(
this.#collectDependencies(obj[i], true, xref).then( this.#collectDependencies(obj[i], true, xref).then(
newObj => (obj[i] = newObj) newObj => (obj[i] = newObj)
@ -159,6 +181,12 @@ class PDFEditor {
} }
if (dict) { if (dict) {
for (const [key, rawObj] of dict.getRawEntries()) { for (const [key, rawObj] of dict.getRawEntries()) {
const postponedActions = postponedRefCopies.get(rawObj);
if (postponedActions) {
// The object is a reference that needs to be copied later.
postponedActions.push(ref => dict.set(key, ref));
continue;
}
promises.push( promises.push(
this.#collectDependencies(rawObj, true, xref).then(newObj => this.#collectDependencies(rawObj, true, xref).then(newObj =>
dict.set(key, newObj) dict.set(key, newObj)
@ -189,11 +217,13 @@ class PDFEditor {
const promises = []; const promises = [];
let newIndex = 0; let newIndex = 0;
this.hasSingleFile = pageInfos.length === 1; this.hasSingleFile = pageInfos.length === 1;
const allDocumentData = [];
for (const { document, includePages, excludePages } of pageInfos) { for (const { document, includePages, excludePages } of pageInfos) {
if (!document) { if (!document) {
continue; continue;
} }
const documentData = new DocumentData(document); const documentData = new DocumentData(document);
allDocumentData.push(documentData);
promises.push(this.#collectDocumentData(documentData)); promises.push(this.#collectDocumentData(documentData));
let keptIndices, keptRanges, deletedIndices, deletedRanges; let keptIndices, keptRanges, deletedIndices, deletedRanges;
for (const page of includePages || []) { for (const page of includePages || []) {
@ -256,6 +286,7 @@ class PDFEditor {
await Promise.all(promises); await Promise.all(promises);
promises.length = 0; promises.length = 0;
this.#collectValidDestinations(allDocumentData);
this.#collectPageLabels(); this.#collectPageLabels();
for (const page of this.oldPages) { for (const page of this.oldPages) {
@ -263,10 +294,15 @@ class PDFEditor {
} }
await Promise.all(promises); await Promise.all(promises);
this.#findDuplicateNamedDestinations();
this.#setPostponedRefCopies(allDocumentData);
for (let i = 0, ii = this.oldPages.length; i < ii; i++) { for (let i = 0, ii = this.oldPages.length; i < ii; i++) {
this.newPages[i] = await this.#makePageCopy(i, null); this.newPages[i] = await this.#makePageCopy(i, null);
} }
this.#fixPostponedRefCopies(allDocumentData);
return this.writePDF(); return this.writePDF();
} }
@ -276,10 +312,17 @@ class PDFEditor {
* @return {Promise<void>} * @return {Promise<void>}
*/ */
async #collectDocumentData(documentData) { async #collectDocumentData(documentData) {
const { document } = documentData; const {
await document.pdfManager document: { pdfManager },
} = documentData;
await Promise.all([
pdfManager
.ensureCatalog("destinations")
.then(destinations => (documentData.destinations = destinations)),
pdfManager
.ensureCatalog("rawPageLabels") .ensureCatalog("rawPageLabels")
.then(pageLabels => (documentData.pageLabels = pageLabels)); .then(pageLabels => (documentData.pageLabels = pageLabels)),
]);
} }
/** /**
@ -290,6 +333,7 @@ class PDFEditor {
async #postCollectPageData(pageData) { async #postCollectPageData(pageData) {
const { const {
page: { xref, annotations }, page: { xref, annotations },
documentData: { pagesMap, destinations, usedNamedDestinations },
} = pageData; } = pageData;
if (!annotations) { if (!annotations) {
@ -300,22 +344,185 @@ class PDFEditor {
let newAnnotations = []; let newAnnotations = [];
let newIndex = 0; let newIndex = 0;
// TODO: remove only links to deleted pages. // Filter out annotations that are linking to deleted pages.
for (const annotationRef of annotations) { for (const annotationRef of annotations) {
const newAnnotationIndex = newIndex++; const newAnnotationIndex = newIndex++;
promises.push( promises.push(
xref.fetchIfRefAsync(annotationRef).then(async annotationDict => { xref.fetchIfRefAsync(annotationRef).then(async annotationDict => {
if (!isName(annotationDict.get("Subtype"), "Link")) { if (!isName(annotationDict.get("Subtype"), "Link")) {
newAnnotations[newAnnotationIndex] = annotationRef; newAnnotations[newAnnotationIndex] = annotationRef;
return;
}
const action = annotationDict.get("A");
const dest =
action instanceof Dict
? action.get("D")
: annotationDict.get("Dest");
if (
!dest /* not a destination */ ||
(Array.isArray(dest) &&
(!(dest[0] instanceof Ref) || pagesMap.has(dest[0])))
) {
// Keep the annotation as is: it isn't linking to a deleted page.
newAnnotations[newAnnotationIndex] = annotationRef;
} else if (typeof dest === "string") {
const destString = stringToPDFString(
dest,
/* keepEscapeSequence = */ true
);
if (destinations.has(destString)) {
// Keep the annotation as is: the named destination is valid.
// Valid named destinations have been collected previously (see
// #collectValidDestinations).
newAnnotations[newAnnotationIndex] = annotationRef;
usedNamedDestinations.add(destString);
}
} }
}) })
); );
} }
await Promise.all(promises); await Promise.all(promises);
newAnnotations = newAnnotations.filter(annot => !!annot); newAnnotations = newAnnotations.filter(annot => !!annot);
pageData.annotations = newAnnotations.length > 0 ? newAnnotations : null; pageData.annotations = newAnnotations.length > 0 ? newAnnotations : null;
} }
/**
* Some references cannot be copied right away since they correspond to some
* pages that haven't been processed yet. Postpone the copy of those
* references.
* @param {Array<DocumentData>} allDocumentData
*/
#setPostponedRefCopies(allDocumentData) {
for (const { postponedRefCopies, pagesMap } of allDocumentData) {
for (const oldPageRef of pagesMap.keys()) {
postponedRefCopies.put(oldPageRef, []);
}
}
}
/**
* Fix all postponed reference copies.
* @param {Array<DocumentData>} allDocumentData
*/
#fixPostponedRefCopies(allDocumentData) {
for (const { postponedRefCopies, oldRefMapping } of allDocumentData) {
for (const [oldRef, actions] of postponedRefCopies.items()) {
const newRef = oldRefMapping.get(oldRef);
for (const action of actions) {
action(newRef);
}
}
postponedRefCopies.clear();
}
}
/**
* Collect named destinations that are still valid (i.e. pointing to kept
* pages).
* @param {Array<DocumentData>} allDocumentData
*/
#collectValidDestinations(allDocumentData) {
// TODO: Handle OpenAction as well.
for (const documentData of allDocumentData) {
if (!documentData.destinations) {
continue;
}
const { destinations, pagesMap } = documentData;
const newDestinations = (documentData.destinations = new Map());
for (const [key, dest] of Object.entries(destinations)) {
const pageRef = dest[0];
const pageData = pagesMap.get(pageRef);
if (!pageData) {
continue;
}
(pageData.pointingNamedDestinations ||= new Set()).add(key);
newDestinations.set(key, dest);
}
}
}
/**
* Find and rename duplicate named destinations.
*/
#findDuplicateNamedDestinations() {
const { namedDestinations } = this;
for (let i = 0, ii = this.oldPages.length; i < ii; i++) {
const page = this.oldPages[i];
const {
documentData: {
destinations,
dedupNamedDestinations,
usedNamedDestinations,
},
} = page;
let { pointingNamedDestinations } = page;
if (!pointingNamedDestinations) {
// No named destinations pointing to this page.
continue;
}
// Keep only the named destinations that are still used.
page.pointingNamedDestinations = pointingNamedDestinations =
pointingNamedDestinations.intersection(usedNamedDestinations);
for (const pointingDest of pointingNamedDestinations) {
if (!usedNamedDestinations.has(pointingDest)) {
// If the named destination isn't used, we can keep it as is.
continue;
}
const dest = destinations.get(pointingDest).slice();
if (!namedDestinations.has(pointingDest)) {
// If the named destination hasn't been used yet, we can keep it
// as is.
namedDestinations.set(pointingDest, dest);
continue;
}
// Create a new unique named destination.
const newName = `${pointingDest}_p${i + 1}`;
dedupNamedDestinations.set(pointingDest, newName);
namedDestinations.set(newName, dest);
}
}
}
/**
* Fix named destinations in the annotations.
* @param {Array<Ref>} annotations
* @param {Map<string,string>} dedupNamedDestinations
*/
#fixNamedDestinations(annotations, dedupNamedDestinations) {
if (dedupNamedDestinations.size === 0) {
return;
}
const fixDestination = (dict, key, dest) => {
if (typeof dest === "string") {
dict.set(
key,
dedupNamedDestinations.get(
stringToPDFString(dest, /* keepEscapeSequence = */ true)
) || dest
);
}
};
for (const annotRef of annotations) {
const annotDict = this.xref[annotRef.num];
if (!isName(annotDict.get("Subtype"), "Link")) {
continue;
}
const action = annotDict.get("A");
if (action instanceof Dict && action.has("D")) {
const dest = action.get("D");
fixDestination(action, "D", dest);
continue;
}
const dest = annotDict.get("Dest");
fixDestination(annotDict, "Dest", dest);
}
}
async #collectPageLabels() { async #collectPageLabels() {
// We can only preserve page labels when editing a single PDF file. // We can only preserve page labels when editing a single PDF file.
// This is consistent with behavior in Adobe Acrobat. // This is consistent with behavior in Adobe Acrobat.
@ -372,14 +579,23 @@ class PDFEditor {
* @returns {Promise<Ref>} the page reference in the new PDF document. * @returns {Promise<Ref>} the page reference in the new PDF document.
*/ */
async #makePageCopy(pageIndex) { async #makePageCopy(pageIndex) {
const { page, documentData, annotations } = this.oldPages[pageIndex]; const { page, documentData, annotations, pointingNamedDestinations } =
this.oldPages[pageIndex];
this.currentDocument = documentData; this.currentDocument = documentData;
const { oldRefMapping } = documentData; const { dedupNamedDestinations, oldRefMapping } = documentData;
const { xref, rotate, mediaBox, resources, ref: oldPageRef } = page; const { xref, rotate, mediaBox, resources, ref: oldPageRef } = page;
const pageRef = this.newRef; const pageRef = this.newRef;
const pageDict = (this.xref[pageRef.num] = page.pageDict.clone()); const pageDict = (this.xref[pageRef.num] = page.pageDict.clone());
oldRefMapping.put(oldPageRef, pageRef); oldRefMapping.put(oldPageRef, pageRef);
if (pointingNamedDestinations) {
for (const pointingDest of pointingNamedDestinations) {
const name = dedupNamedDestinations.get(pointingDest) || pointingDest;
const dest = this.namedDestinations.get(name);
dest[0] = pageRef;
}
}
// No need to keep these entries as we'll set them again later. // No need to keep these entries as we'll set them again later.
for (const key of [ for (const key of [
"Rotate", "Rotate",
@ -416,10 +632,16 @@ class PDFEditor {
"Resources", "Resources",
await this.#collectDependencies(resources, true, xref) await this.#collectDependencies(resources, true, xref)
); );
pageDict.setIfArray(
"Annots", if (annotations) {
await this.#collectDependencies(annotations, true, xref) const newAnnotations = await this.#collectDependencies(
annotations,
true,
xref
); );
this.#fixNamedDestinations(newAnnotations, dedupNamedDestinations);
pageDict.setIfArray("Annots", newAnnotations);
}
if (this.useObjectStreams) { if (this.useObjectStreams) {
const newLastRef = this.newRefCount; const newLastRef = this.newRefCount;
@ -485,7 +707,7 @@ class PDFEditor {
/** /**
* Create a name or number tree from the given map. * Create a name or number tree from the given map.
* @param {Array<[string, any]>} map * @param {Array<[string|number, any]>} map
* @returns {Ref} * @returns {Ref}
*/ */
#makeNameNumTree(map, areNames) { #makeNameNumTree(map, areNames) {
@ -543,6 +765,24 @@ class PDFEditor {
rootDict.set("PageLabels", pageLabelsRef); rootDict.set("PageLabels", pageLabelsRef);
} }
#makeDestinationsTree() {
const { namedDestinations } = this;
if (namedDestinations.size === 0) {
return;
}
if (!this.namesDict) {
[this.namesRef, this.namesDict] = this.newDict;
this.rootDict.set("Names", this.namesRef);
}
this.namesDict.set(
"Dests",
this.#makeNameNumTree(
Array.from(namedDestinations.entries()),
/* areNames = */ true
)
);
}
/** /**
* Create the root dictionary. * Create the root dictionary.
* @returns {Promise<void>} * @returns {Promise<void>}
@ -553,6 +793,7 @@ class PDFEditor {
rootDict.set("Version", this.version); rootDict.set("Version", this.version);
this.#makePageTree(); this.#makePageTree();
this.#makePageLabelsTree(); this.#makePageLabelsTree();
this.#makeDestinationsTree();
} }
/** /**

View File

@ -439,6 +439,12 @@ class RefSetCache {
yield [Ref.fromString(ref), value]; yield [Ref.fromString(ref), value];
} }
} }
*keys() {
for (const ref of this._map.keys()) {
yield Ref.fromString(ref);
}
}
} }
function isName(v, name) { function isName(v, name) {

View File

@ -758,3 +758,4 @@
!doc_2_3_pages.pdf !doc_2_3_pages.pdf
!doc_3_3_pages.pdf !doc_3_3_pages.pdf
!labelled_pages.pdf !labelled_pages.pdf
!extract_link.pdf

BIN
test/pdfs/extract_link.pdf Executable file

Binary file not shown.

View File

@ -5337,6 +5337,15 @@ small scripts as well as for`);
}); });
describe("PDF page editing", function () { describe("PDF page editing", function () {
const getPageRefs = async pdfDoc => {
const refs = [];
for (let i = 1; i <= pdfDoc.numPages; i++) {
const page = await pdfDoc.getPage(i);
refs.push(page.ref);
}
return refs;
};
describe("Merge pdfs", function () { describe("Merge pdfs", function () {
it("should merge three PDFs", async function () { it("should merge three PDFs", async function () {
const loadingTask = getDocument( const loadingTask = getDocument(
@ -5576,5 +5585,142 @@ small scripts as well as for`);
await loadingTask.destroy(); await loadingTask.destroy();
}); });
}); });
describe("Named destinations", function () {
it("extract page and check destinations", async function () {
let loadingTask = getDocument(buildGetDocumentParams("issue6204.pdf"));
let pdfDoc = await loadingTask.promise;
let pagesRef = await getPageRefs(pdfDoc);
let destinations = await pdfDoc.getDestinations();
expect(destinations).toEqual({
"Page.1": [pagesRef[0], { name: "XYZ" }, 0, 375, null],
"Page.2": [pagesRef[1], { name: "XYZ" }, 0, 375, null],
});
let data = await pdfDoc.extractPages([
{ document: null },
{ document: null },
]);
await loadingTask.destroy();
loadingTask = getDocument(data);
pdfDoc = await loadingTask.promise;
expect(pdfDoc.numPages).toEqual(4);
pagesRef = await getPageRefs(pdfDoc);
destinations = await pdfDoc.getDestinations();
expect(destinations).toEqual({
"Page.1": [pagesRef[0], { name: "XYZ" }, 0, 375, null],
"Page.2": [pagesRef[1], { name: "XYZ" }, 0, 375, null],
"Page.1_p3": [pagesRef[2], { name: "XYZ" }, 0, 375, null],
"Page.2_p4": [pagesRef[3], { name: "XYZ" }, 0, 375, null],
});
const expectedDests = ["Page.2", "Page.1", "Page.2_p4", "Page.1_p3"];
for (let i = 1; i <= 4; i++) {
const pdfPage = await pdfDoc.getPage(i);
const annots = await pdfPage.getAnnotations();
expect(annots.length).toEqual(1);
expect(annots[0].dest).toEqual(expectedDests[i - 1]);
}
data = await pdfDoc.extractPages([
{ document: null },
{ document: null },
]);
await loadingTask.destroy();
loadingTask = getDocument(data);
pdfDoc = await loadingTask.promise;
expect(pdfDoc.numPages).toEqual(8);
pagesRef = await getPageRefs(pdfDoc);
destinations = await pdfDoc.getDestinations();
expect(destinations).toEqual({
"Page.1": [pagesRef[0], { name: "XYZ" }, 0, 375, null],
"Page.2": [pagesRef[1], { name: "XYZ" }, 0, 375, null],
"Page.1_p3": [pagesRef[2], { name: "XYZ" }, 0, 375, null],
"Page.2_p4": [pagesRef[3], { name: "XYZ" }, 0, 375, null],
"Page.1_p5": [pagesRef[4], { name: "XYZ" }, 0, 375, null],
"Page.2_p6": [pagesRef[5], { name: "XYZ" }, 0, 375, null],
"Page.1_p3_p7": [pagesRef[6], { name: "XYZ" }, 0, 375, null],
"Page.2_p4_p8": [pagesRef[7], { name: "XYZ" }, 0, 375, null],
});
expectedDests.push(
"Page.2_p6",
"Page.1_p5",
"Page.2_p4_p8",
"Page.1_p3_p7"
);
for (let i = 1; i <= 8; i++) {
const pdfPage = await pdfDoc.getPage(i);
const annots = await pdfPage.getAnnotations();
expect(annots.length).toEqual(1);
expect(annots[0].dest).toEqual(expectedDests[i - 1]);
}
await loadingTask.destroy();
});
it("extract pages and check deleted destinations", async function () {
let loadingTask = getDocument(buildGetDocumentParams("issue6204.pdf"));
let pdfDoc = await loadingTask.promise;
const data = await pdfDoc.extractPages([
{ document: null },
{ document: null, excludePages: [0] },
]);
await loadingTask.destroy();
loadingTask = getDocument(data);
pdfDoc = await loadingTask.promise;
expect(pdfDoc.numPages).toEqual(3);
const pagesRef = await getPageRefs(pdfDoc);
const destinations = await pdfDoc.getDestinations();
expect(destinations).toEqual({
"Page.1": [pagesRef[0], { name: "XYZ" }, 0, 375, null],
"Page.2": [pagesRef[1], { name: "XYZ" }, 0, 375, null],
});
const pdfPage = await pdfDoc.getPage(3);
const annots = await pdfPage.getAnnotations();
expect(annots.length).toEqual(0);
});
});
describe("Destinations with a page reference", function () {
it("extract page and check destinations", async function () {
let loadingTask = getDocument(
buildGetDocumentParams("extract_link.pdf")
);
let pdfDoc = await loadingTask.promise;
let pagesRef = await getPageRefs(pdfDoc);
let pdfPage = await pdfDoc.getPage(1);
let annotations = await pdfPage.getAnnotations();
expect(annotations.length).toEqual(1);
expect(annotations[0].dest[0]).toEqual(pagesRef[1]);
const data = await pdfDoc.extractPages([
{ document: null },
{ document: null },
]);
await loadingTask.destroy();
loadingTask = getDocument(data);
pdfDoc = await loadingTask.promise;
expect(pdfDoc.numPages).toEqual(4);
pagesRef = await getPageRefs(pdfDoc);
for (let i = 1; i <= 4; i += 2) {
pdfPage = await pdfDoc.getPage(i);
annotations = await pdfPage.getAnnotations();
expect(annotations.length).toEqual(1);
expect(annotations[0].dest[0]).toEqual(pagesRef[i]);
}
await loadingTask.destroy();
});
});
}); });
}); });

View File

@ -562,6 +562,12 @@ describe("primitives", function () {
[ref2, obj2], [ref2, obj2],
]); ]);
}); });
it("should support iteration over keys", function () {
cache.put(ref1, obj1);
cache.put(ref2, obj2);
expect([...cache.keys()]).toEqual([ref1, ref2]);
});
}); });
describe("isName", function () { describe("isName", function () {