Merge pull request #20411 from calixteman/split_merge_p2
Update the page labels tree when a pdf is extracted (bug 1997379)
This commit is contained in:
commit
57334bd205
@ -735,6 +735,16 @@ class Catalog {
|
|||||||
return rawDests;
|
return rawDests;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
get rawPageLabels() {
|
||||||
|
const obj = this.#catDict.getRaw("PageLabels");
|
||||||
|
if (!obj) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const numberTree = new NumberTree(obj, this.xref);
|
||||||
|
return numberTree.getAll();
|
||||||
|
}
|
||||||
|
|
||||||
get pageLabels() {
|
get pageLabels() {
|
||||||
let obj = null;
|
let obj = null;
|
||||||
try {
|
try {
|
||||||
@ -749,8 +759,8 @@ class Catalog {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#readPageLabels() {
|
#readPageLabels() {
|
||||||
const obj = this.#catDict.getRaw("PageLabels");
|
const nums = this.rawPageLabels;
|
||||||
if (!obj) {
|
if (!nums) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -758,8 +768,6 @@ class Catalog {
|
|||||||
let style = null,
|
let style = null,
|
||||||
prefix = "";
|
prefix = "";
|
||||||
|
|
||||||
const numberTree = new NumberTree(obj, this.xref);
|
|
||||||
const nums = numberTree.getAll();
|
|
||||||
let currentLabel = "",
|
let currentLabel = "",
|
||||||
currentIndex = 1;
|
currentIndex = 1;
|
||||||
|
|
||||||
|
|||||||
@ -25,6 +25,7 @@ import { StringStream } from "../stream.js";
|
|||||||
import { stringToAsciiOrUTF16BE } from "../core_utils.js";
|
import { stringToAsciiOrUTF16BE } from "../core_utils.js";
|
||||||
|
|
||||||
const MAX_LEAVES_PER_PAGES_NODE = 16;
|
const MAX_LEAVES_PER_PAGES_NODE = 16;
|
||||||
|
const MAX_IN_NAME_TREE_NODE = 64;
|
||||||
|
|
||||||
class PageData {
|
class PageData {
|
||||||
constructor(page, documentData) {
|
constructor(page, documentData) {
|
||||||
@ -39,6 +40,7 @@ class PageData {
|
|||||||
class DocumentData {
|
class DocumentData {
|
||||||
constructor(document) {
|
constructor(document) {
|
||||||
this.document = document;
|
this.document = document;
|
||||||
|
this.pageLabels = null;
|
||||||
this.pagesMap = new RefSetCache();
|
this.pagesMap = new RefSetCache();
|
||||||
this.oldRefMapping = new RefSetCache();
|
this.oldRefMapping = new RefSetCache();
|
||||||
}
|
}
|
||||||
@ -61,6 +63,7 @@ class PDFEditor {
|
|||||||
this.version = "1.7";
|
this.version = "1.7";
|
||||||
this.title = title;
|
this.title = title;
|
||||||
this.author = author;
|
this.author = author;
|
||||||
|
this.pageLabels = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -253,6 +256,8 @@ class PDFEditor {
|
|||||||
await Promise.all(promises);
|
await Promise.all(promises);
|
||||||
promises.length = 0;
|
promises.length = 0;
|
||||||
|
|
||||||
|
this.#collectPageLabels();
|
||||||
|
|
||||||
for (const page of this.oldPages) {
|
for (const page of this.oldPages) {
|
||||||
promises.push(this.#postCollectPageData(page));
|
promises.push(this.#postCollectPageData(page));
|
||||||
}
|
}
|
||||||
@ -270,7 +275,12 @@ class PDFEditor {
|
|||||||
* @param {DocumentData} documentData
|
* @param {DocumentData} documentData
|
||||||
* @return {Promise<void>}
|
* @return {Promise<void>}
|
||||||
*/
|
*/
|
||||||
async #collectDocumentData(documentData) {}
|
async #collectDocumentData(documentData) {
|
||||||
|
const { document } = documentData;
|
||||||
|
await document.pdfManager
|
||||||
|
.ensureCatalog("rawPageLabels")
|
||||||
|
.then(pageLabels => (documentData.pageLabels = pageLabels));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Post process the collected page data.
|
* Post process the collected page data.
|
||||||
@ -306,6 +316,56 @@ class PDFEditor {
|
|||||||
pageData.annotations = newAnnotations.length > 0 ? newAnnotations : null;
|
pageData.annotations = newAnnotations.length > 0 ? newAnnotations : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async #collectPageLabels() {
|
||||||
|
// We can only preserve page labels when editing a single PDF file.
|
||||||
|
// This is consistent with behavior in Adobe Acrobat.
|
||||||
|
if (!this.hasSingleFile) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const {
|
||||||
|
documentData: { document, pageLabels },
|
||||||
|
} = this.oldPages[0];
|
||||||
|
if (!pageLabels) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const numPages = document.numPages;
|
||||||
|
const oldPageLabels = [];
|
||||||
|
const oldPageIndices = new Set(
|
||||||
|
this.oldPages.map(({ page: { pageIndex } }) => pageIndex)
|
||||||
|
);
|
||||||
|
let currentLabel = null;
|
||||||
|
let stFirstIndex = -1;
|
||||||
|
for (let i = 0; i < numPages; i++) {
|
||||||
|
const newLabel = pageLabels.get(i);
|
||||||
|
if (newLabel) {
|
||||||
|
currentLabel = newLabel;
|
||||||
|
stFirstIndex = currentLabel.has("St") ? i : -1;
|
||||||
|
}
|
||||||
|
if (!oldPageIndices.has(i)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (stFirstIndex !== -1) {
|
||||||
|
const st = currentLabel.get("St");
|
||||||
|
currentLabel = currentLabel.clone();
|
||||||
|
currentLabel.set("St", st + (i - stFirstIndex));
|
||||||
|
stFirstIndex = -1;
|
||||||
|
}
|
||||||
|
oldPageLabels.push(currentLabel);
|
||||||
|
}
|
||||||
|
currentLabel = oldPageLabels[0];
|
||||||
|
let currentIndex = 0;
|
||||||
|
const newPageLabels = (this.pageLabels = [[0, currentLabel]]);
|
||||||
|
for (let i = 0, ii = oldPageLabels.length; i < ii; i++) {
|
||||||
|
const label = oldPageLabels[i];
|
||||||
|
if (label === currentLabel) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
currentIndex = i;
|
||||||
|
currentLabel = label;
|
||||||
|
newPageLabels.push([currentIndex, currentLabel]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a copy of a page.
|
* Create a copy of a page.
|
||||||
* @param {number} pageIndex
|
* @param {number} pageIndex
|
||||||
@ -423,6 +483,66 @@ class PDFEditor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a name or number tree from the given map.
|
||||||
|
* @param {Array<[string, any]>} map
|
||||||
|
* @returns {Ref}
|
||||||
|
*/
|
||||||
|
#makeNameNumTree(map, areNames) {
|
||||||
|
const allEntries = map.sort(
|
||||||
|
areNames
|
||||||
|
? ([keyA], [keyB]) => keyA.localeCompare(keyB)
|
||||||
|
: ([keyA], [keyB]) => keyA - keyB
|
||||||
|
);
|
||||||
|
const maxLeaves =
|
||||||
|
MAX_IN_NAME_TREE_NODE <= 1 ? allEntries.length : MAX_IN_NAME_TREE_NODE;
|
||||||
|
const [treeRef, treeDict] = this.newDict;
|
||||||
|
const stack = [{ dict: treeDict, entries: allEntries }];
|
||||||
|
const valueType = areNames ? "Names" : "Nums";
|
||||||
|
|
||||||
|
while (stack.length > 0) {
|
||||||
|
const { dict, entries } = stack.pop();
|
||||||
|
if (entries.length <= maxLeaves) {
|
||||||
|
dict.set("Limits", [entries[0][0], entries.at(-1)[0]]);
|
||||||
|
dict.set(valueType, entries.flat());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const entriesChunks = [];
|
||||||
|
const chunkSize = Math.max(
|
||||||
|
maxLeaves,
|
||||||
|
Math.ceil(entries.length / maxLeaves)
|
||||||
|
);
|
||||||
|
for (let i = 0; i < entries.length; i += chunkSize) {
|
||||||
|
entriesChunks.push(entries.slice(i, i + chunkSize));
|
||||||
|
}
|
||||||
|
const entriesRefs = [];
|
||||||
|
dict.set("Kids", entriesRefs);
|
||||||
|
for (const chunk of entriesChunks) {
|
||||||
|
const [entriesRef, entriesDict] = this.newDict;
|
||||||
|
entriesRefs.push(entriesRef);
|
||||||
|
entriesDict.set("Limits", [chunk[0][0], chunk.at(-1)[0]]);
|
||||||
|
stack.push({ dict: entriesDict, entries: chunk });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return treeRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create the page labels tree if it exists.
|
||||||
|
*/
|
||||||
|
#makePageLabelsTree() {
|
||||||
|
const { pageLabels } = this;
|
||||||
|
if (!pageLabels || pageLabels.length === 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const { rootDict } = this;
|
||||||
|
const pageLabelsRef = this.#makeNameNumTree(
|
||||||
|
this.pageLabels,
|
||||||
|
/* areNames = */ false
|
||||||
|
);
|
||||||
|
rootDict.set("PageLabels", pageLabelsRef);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create the root dictionary.
|
* Create the root dictionary.
|
||||||
* @returns {Promise<void>}
|
* @returns {Promise<void>}
|
||||||
@ -432,6 +552,7 @@ class PDFEditor {
|
|||||||
rootDict.setIfName("Type", "Catalog");
|
rootDict.setIfName("Type", "Catalog");
|
||||||
rootDict.set("Version", this.version);
|
rootDict.set("Version", this.version);
|
||||||
this.#makePageTree();
|
this.#makePageTree();
|
||||||
|
this.#makePageLabelsTree();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -757,3 +757,4 @@
|
|||||||
!doc_1_3_pages.pdf
|
!doc_1_3_pages.pdf
|
||||||
!doc_2_3_pages.pdf
|
!doc_2_3_pages.pdf
|
||||||
!doc_3_3_pages.pdf
|
!doc_3_3_pages.pdf
|
||||||
|
!labelled_pages.pdf
|
||||||
|
|||||||
BIN
test/pdfs/labelled_pages.pdf
Executable file
BIN
test/pdfs/labelled_pages.pdf
Executable file
Binary file not shown.
@ -5542,5 +5542,39 @@ small scripts as well as for`);
|
|||||||
await loadingTask.destroy();
|
await loadingTask.destroy();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("Page labels", function () {
|
||||||
|
it("extract page and check labels", async function () {
|
||||||
|
let loadingTask = getDocument(
|
||||||
|
buildGetDocumentParams("labelled_pages.pdf")
|
||||||
|
);
|
||||||
|
const pdfDoc = await loadingTask.promise;
|
||||||
|
let labels = await pdfDoc.getPageLabels();
|
||||||
|
expect(labels).toEqual([
|
||||||
|
"i" /* Page 0 */,
|
||||||
|
"ii" /* Page 1 */,
|
||||||
|
"iii" /* Page 2 */,
|
||||||
|
"iv" /* Page 3 */,
|
||||||
|
"1" /* Page 4 */,
|
||||||
|
"2" /* Page 5 */,
|
||||||
|
"3" /* Page 6 */,
|
||||||
|
"a" /* Page 7 */,
|
||||||
|
"b" /* Page 8 */,
|
||||||
|
"4" /* Page 9 */,
|
||||||
|
"5" /* Page 10 */,
|
||||||
|
]);
|
||||||
|
|
||||||
|
const data = await pdfDoc.extractPages({
|
||||||
|
document: null,
|
||||||
|
includePages: [0, 1, 5, 7, 10],
|
||||||
|
});
|
||||||
|
await loadingTask.destroy();
|
||||||
|
loadingTask = getDocument(data);
|
||||||
|
const newPdfDoc = await loadingTask.promise;
|
||||||
|
labels = await newPdfDoc.getPageLabels();
|
||||||
|
expect(labels).toEqual(["i", "ii", "1", "a", "5"]);
|
||||||
|
await loadingTask.destroy();
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user