Update the page labels tree when a pdf is extracted (bug 1997379)
This commit is contained in:
parent
85ed401b82
commit
ad97c5b816
@ -735,6 +735,16 @@ class Catalog {
|
||||
return rawDests;
|
||||
}
|
||||
|
||||
get rawPageLabels() {
|
||||
const obj = this.#catDict.getRaw("PageLabels");
|
||||
if (!obj) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const numberTree = new NumberTree(obj, this.xref);
|
||||
return numberTree.getAll();
|
||||
}
|
||||
|
||||
get pageLabels() {
|
||||
let obj = null;
|
||||
try {
|
||||
@ -749,8 +759,8 @@ class Catalog {
|
||||
}
|
||||
|
||||
#readPageLabels() {
|
||||
const obj = this.#catDict.getRaw("PageLabels");
|
||||
if (!obj) {
|
||||
const nums = this.rawPageLabels;
|
||||
if (!nums) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@ -758,8 +768,6 @@ class Catalog {
|
||||
let style = null,
|
||||
prefix = "";
|
||||
|
||||
const numberTree = new NumberTree(obj, this.xref);
|
||||
const nums = numberTree.getAll();
|
||||
let currentLabel = "",
|
||||
currentIndex = 1;
|
||||
|
||||
|
||||
@ -25,6 +25,7 @@ import { StringStream } from "../stream.js";
|
||||
import { stringToAsciiOrUTF16BE } from "../core_utils.js";
|
||||
|
||||
const MAX_LEAVES_PER_PAGES_NODE = 16;
|
||||
const MAX_IN_NAME_TREE_NODE = 64;
|
||||
|
||||
class PageData {
|
||||
constructor(page, documentData) {
|
||||
@ -39,6 +40,7 @@ class PageData {
|
||||
class DocumentData {
|
||||
constructor(document) {
|
||||
this.document = document;
|
||||
this.pageLabels = null;
|
||||
this.pagesMap = new RefSetCache();
|
||||
this.oldRefMapping = new RefSetCache();
|
||||
}
|
||||
@ -61,6 +63,7 @@ class PDFEditor {
|
||||
this.version = "1.7";
|
||||
this.title = title;
|
||||
this.author = author;
|
||||
this.pageLabels = null;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -253,6 +256,8 @@ class PDFEditor {
|
||||
await Promise.all(promises);
|
||||
promises.length = 0;
|
||||
|
||||
this.#collectPageLabels();
|
||||
|
||||
for (const page of this.oldPages) {
|
||||
promises.push(this.#postCollectPageData(page));
|
||||
}
|
||||
@ -270,7 +275,12 @@ class PDFEditor {
|
||||
* @param {DocumentData} documentData
|
||||
* @return {Promise<void>}
|
||||
*/
|
||||
async #collectDocumentData(documentData) {}
|
||||
async #collectDocumentData(documentData) {
|
||||
const { document } = documentData;
|
||||
await document.pdfManager
|
||||
.ensureCatalog("rawPageLabels")
|
||||
.then(pageLabels => (documentData.pageLabels = pageLabels));
|
||||
}
|
||||
|
||||
/**
|
||||
* Post process the collected page data.
|
||||
@ -306,6 +316,56 @@ class PDFEditor {
|
||||
pageData.annotations = newAnnotations.length > 0 ? newAnnotations : null;
|
||||
}
|
||||
|
||||
async #collectPageLabels() {
|
||||
// We can only preserve page labels when editing a single PDF file.
|
||||
// This is consistent with behavior in Adobe Acrobat.
|
||||
if (!this.hasSingleFile) {
|
||||
return;
|
||||
}
|
||||
const {
|
||||
documentData: { document, pageLabels },
|
||||
} = this.oldPages[0];
|
||||
if (!pageLabels) {
|
||||
return;
|
||||
}
|
||||
const numPages = document.numPages;
|
||||
const oldPageLabels = [];
|
||||
const oldPageIndices = new Set(
|
||||
this.oldPages.map(({ page: { pageIndex } }) => pageIndex)
|
||||
);
|
||||
let currentLabel = null;
|
||||
let stFirstIndex = -1;
|
||||
for (let i = 0; i < numPages; i++) {
|
||||
const newLabel = pageLabels.get(i);
|
||||
if (newLabel) {
|
||||
currentLabel = newLabel;
|
||||
stFirstIndex = currentLabel.has("St") ? i : -1;
|
||||
}
|
||||
if (!oldPageIndices.has(i)) {
|
||||
continue;
|
||||
}
|
||||
if (stFirstIndex !== -1) {
|
||||
const st = currentLabel.get("St");
|
||||
currentLabel = currentLabel.clone();
|
||||
currentLabel.set("St", st + (i - stFirstIndex));
|
||||
stFirstIndex = -1;
|
||||
}
|
||||
oldPageLabels.push(currentLabel);
|
||||
}
|
||||
currentLabel = oldPageLabels[0];
|
||||
let currentIndex = 0;
|
||||
const newPageLabels = (this.pageLabels = [[0, currentLabel]]);
|
||||
for (let i = 0, ii = oldPageLabels.length; i < ii; i++) {
|
||||
const label = oldPageLabels[i];
|
||||
if (label === currentLabel) {
|
||||
continue;
|
||||
}
|
||||
currentIndex = i;
|
||||
currentLabel = label;
|
||||
newPageLabels.push([currentIndex, currentLabel]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a copy of a page.
|
||||
* @param {number} pageIndex
|
||||
@ -423,6 +483,66 @@ class PDFEditor {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a name or number tree from the given map.
|
||||
* @param {Array<[string, any]>} map
|
||||
* @returns {Ref}
|
||||
*/
|
||||
#makeNameNumTree(map, areNames) {
|
||||
const allEntries = map.sort(
|
||||
areNames
|
||||
? ([keyA], [keyB]) => keyA.localeCompare(keyB)
|
||||
: ([keyA], [keyB]) => keyA - keyB
|
||||
);
|
||||
const maxLeaves =
|
||||
MAX_IN_NAME_TREE_NODE <= 1 ? allEntries.length : MAX_IN_NAME_TREE_NODE;
|
||||
const [treeRef, treeDict] = this.newDict;
|
||||
const stack = [{ dict: treeDict, entries: allEntries }];
|
||||
const valueType = areNames ? "Names" : "Nums";
|
||||
|
||||
while (stack.length > 0) {
|
||||
const { dict, entries } = stack.pop();
|
||||
if (entries.length <= maxLeaves) {
|
||||
dict.set("Limits", [entries[0][0], entries.at(-1)[0]]);
|
||||
dict.set(valueType, entries.flat());
|
||||
continue;
|
||||
}
|
||||
const entriesChunks = [];
|
||||
const chunkSize = Math.max(
|
||||
maxLeaves,
|
||||
Math.ceil(entries.length / maxLeaves)
|
||||
);
|
||||
for (let i = 0; i < entries.length; i += chunkSize) {
|
||||
entriesChunks.push(entries.slice(i, i + chunkSize));
|
||||
}
|
||||
const entriesRefs = [];
|
||||
dict.set("Kids", entriesRefs);
|
||||
for (const chunk of entriesChunks) {
|
||||
const [entriesRef, entriesDict] = this.newDict;
|
||||
entriesRefs.push(entriesRef);
|
||||
entriesDict.set("Limits", [chunk[0][0], chunk.at(-1)[0]]);
|
||||
stack.push({ dict: entriesDict, entries: chunk });
|
||||
}
|
||||
}
|
||||
return treeRef;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the page labels tree if it exists.
|
||||
*/
|
||||
#makePageLabelsTree() {
|
||||
const { pageLabels } = this;
|
||||
if (!pageLabels || pageLabels.length === 0) {
|
||||
return;
|
||||
}
|
||||
const { rootDict } = this;
|
||||
const pageLabelsRef = this.#makeNameNumTree(
|
||||
this.pageLabels,
|
||||
/* areNames = */ false
|
||||
);
|
||||
rootDict.set("PageLabels", pageLabelsRef);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the root dictionary.
|
||||
* @returns {Promise<void>}
|
||||
@ -432,6 +552,7 @@ class PDFEditor {
|
||||
rootDict.setIfName("Type", "Catalog");
|
||||
rootDict.set("Version", this.version);
|
||||
this.#makePageTree();
|
||||
this.#makePageLabelsTree();
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -757,3 +757,4 @@
|
||||
!doc_1_3_pages.pdf
|
||||
!doc_2_3_pages.pdf
|
||||
!doc_3_3_pages.pdf
|
||||
!labelled_pages.pdf
|
||||
|
||||
BIN
test/pdfs/labelled_pages.pdf
Executable file
BIN
test/pdfs/labelled_pages.pdf
Executable file
Binary file not shown.
@ -5542,5 +5542,39 @@ small scripts as well as for`);
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
});
|
||||
|
||||
describe("Page labels", function () {
|
||||
it("extract page and check labels", async function () {
|
||||
let loadingTask = getDocument(
|
||||
buildGetDocumentParams("labelled_pages.pdf")
|
||||
);
|
||||
const pdfDoc = await loadingTask.promise;
|
||||
let labels = await pdfDoc.getPageLabels();
|
||||
expect(labels).toEqual([
|
||||
"i" /* Page 0 */,
|
||||
"ii" /* Page 1 */,
|
||||
"iii" /* Page 2 */,
|
||||
"iv" /* Page 3 */,
|
||||
"1" /* Page 4 */,
|
||||
"2" /* Page 5 */,
|
||||
"3" /* Page 6 */,
|
||||
"a" /* Page 7 */,
|
||||
"b" /* Page 8 */,
|
||||
"4" /* Page 9 */,
|
||||
"5" /* Page 10 */,
|
||||
]);
|
||||
|
||||
const data = await pdfDoc.extractPages({
|
||||
document: null,
|
||||
includePages: [0, 1, 5, 7, 10],
|
||||
});
|
||||
await loadingTask.destroy();
|
||||
loadingTask = getDocument(data);
|
||||
const newPdfDoc = await loadingTask.promise;
|
||||
labels = await newPdfDoc.getPageLabels();
|
||||
expect(labels).toEqual(["i", "ii", "1", "a", "5"]);
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user