Merge pull request #20409 from calixteman/split_merge_p1
Add the possibility to create a pdf from different ones (bug 1997379)
This commit is contained in:
commit
85ed401b82
@ -131,6 +131,19 @@ class DecodeStream extends BaseStream {
|
||||
getBaseStreams() {
|
||||
return this.stream ? this.stream.getBaseStreams() : null;
|
||||
}
|
||||
|
||||
clone() {
|
||||
// Make sure it has been fully read.
|
||||
while (!this.eof) {
|
||||
this.readBlock();
|
||||
}
|
||||
return new Stream(
|
||||
this.buffer,
|
||||
this.start,
|
||||
this.end - this.start,
|
||||
this.dict.clone()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
class StreamsSequenceStream extends DecodeStream {
|
||||
|
||||
@ -52,6 +52,10 @@ class DecryptStream extends DecodeStream {
|
||||
buffer.set(chunk, bufferLength);
|
||||
this.bufferLength = newLength;
|
||||
}
|
||||
|
||||
getOriginalStream() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
export { DecryptStream };
|
||||
|
||||
@ -178,7 +178,7 @@ class Page {
|
||||
);
|
||||
}
|
||||
|
||||
#getBoundingBox(name) {
|
||||
getBoundingBox(name) {
|
||||
if (this.xfaData) {
|
||||
return this.xfaData.bbox;
|
||||
}
|
||||
@ -201,7 +201,7 @@ class Page {
|
||||
return shadow(
|
||||
this,
|
||||
"mediaBox",
|
||||
this.#getBoundingBox("MediaBox") || LETTER_SIZE_MEDIABOX
|
||||
this.getBoundingBox("MediaBox") || LETTER_SIZE_MEDIABOX
|
||||
);
|
||||
}
|
||||
|
||||
@ -210,7 +210,7 @@ class Page {
|
||||
return shadow(
|
||||
this,
|
||||
"cropBox",
|
||||
this.#getBoundingBox("CropBox") || this.mediaBox
|
||||
this.getBoundingBox("CropBox") || this.mediaBox
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
594
src/core/editor/pdf_editor.js
Normal file
594
src/core/editor/pdf_editor.js
Normal file
@ -0,0 +1,594 @@
|
||||
/* Copyright 2025 Mozilla Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/** @typedef {import("../document.js").PDFDocument} PDFDocument */
|
||||
/** @typedef {import("../document.js").Page} Page */
|
||||
/** @typedef {import("../xref.js").XRef} XRef */
|
||||
|
||||
import { Dict, isName, Ref, RefSetCache } from "../primitives.js";
|
||||
import { getModificationDate, stringToPDFString } from "../../shared/util.js";
|
||||
import { incrementalUpdate, writeValue } from "../writer.js";
|
||||
import { BaseStream } from "../base_stream.js";
|
||||
import { StringStream } from "../stream.js";
|
||||
import { stringToAsciiOrUTF16BE } from "../core_utils.js";
|
||||
|
||||
const MAX_LEAVES_PER_PAGES_NODE = 16;
|
||||
|
||||
class PageData {
|
||||
constructor(page, documentData) {
|
||||
this.page = page;
|
||||
this.documentData = documentData;
|
||||
this.annotations = null;
|
||||
|
||||
documentData.pagesMap.put(page.ref, this);
|
||||
}
|
||||
}
|
||||
|
||||
class DocumentData {
|
||||
constructor(document) {
|
||||
this.document = document;
|
||||
this.pagesMap = new RefSetCache();
|
||||
this.oldRefMapping = new RefSetCache();
|
||||
}
|
||||
}
|
||||
|
||||
class PDFEditor {
|
||||
constructor({ useObjectStreams = true, title = "", author = "" } = {}) {
|
||||
this.hasSingleFile = false;
|
||||
this.currentDocument = null;
|
||||
this.oldPages = [];
|
||||
this.newPages = [];
|
||||
this.xref = [null];
|
||||
this.newRefCount = 1;
|
||||
[this.rootRef, this.rootDict] = this.newDict;
|
||||
[this.infoRef, this.infoDict] = this.newDict;
|
||||
[this.pagesRef, this.pagesDict] = this.newDict;
|
||||
this.namesDict = null;
|
||||
this.useObjectStreams = useObjectStreams;
|
||||
this.objStreamRefs = useObjectStreams ? new Set() : null;
|
||||
this.version = "1.7";
|
||||
this.title = title;
|
||||
this.author = author;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a new reference for an object in the PDF.
|
||||
* @returns {Ref}
|
||||
*/
|
||||
get newRef() {
|
||||
const ref = Ref.get(this.newRefCount++, 0);
|
||||
return ref;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new dictionary and its reference.
|
||||
* @returns {[Ref, Dict]}
|
||||
*/
|
||||
get newDict() {
|
||||
const ref = this.newRef;
|
||||
const dict = (this.xref[ref.num] = new Dict());
|
||||
return [ref, dict];
|
||||
}
|
||||
|
||||
/**
|
||||
* Clone an object in the PDF.
|
||||
* @param {*} obj
|
||||
* @param {XRef} xref
|
||||
* @returns {Promise<Ref>}
|
||||
*/
|
||||
async #cloneObject(obj, xref) {
|
||||
const ref = this.newRef;
|
||||
this.xref[ref.num] = await this.#collectDependencies(obj, true, xref);
|
||||
return ref;
|
||||
}
|
||||
|
||||
/**
|
||||
* Collect the dependencies of an object and create new references for each
|
||||
* dependency.
|
||||
* @param {*} obj
|
||||
* @param {boolean} mustClone
|
||||
* @param {XRef} xref
|
||||
* @returns {Promise<*>}
|
||||
*/
|
||||
async #collectDependencies(obj, mustClone, xref) {
|
||||
if (obj instanceof Ref) {
|
||||
const {
|
||||
currentDocument: { oldRefMapping },
|
||||
} = this;
|
||||
let newRef = oldRefMapping.get(obj);
|
||||
if (newRef) {
|
||||
return newRef;
|
||||
}
|
||||
newRef = this.newRef;
|
||||
oldRefMapping.put(obj, newRef);
|
||||
obj = await xref.fetchAsync(obj);
|
||||
|
||||
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
|
||||
if (
|
||||
obj instanceof Dict &&
|
||||
isName(obj.get("Type"), "Page") &&
|
||||
!this.currentDocument.pagesMap.has(obj)
|
||||
) {
|
||||
throw new Error(
|
||||
"Add a deleted page to the document is not supported."
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
this.xref[newRef.num] = await this.#collectDependencies(obj, true, xref);
|
||||
return newRef;
|
||||
}
|
||||
const promises = [];
|
||||
if (Array.isArray(obj)) {
|
||||
if (mustClone) {
|
||||
obj = obj.slice();
|
||||
}
|
||||
for (let i = 0, ii = obj.length; i < ii; i++) {
|
||||
promises.push(
|
||||
this.#collectDependencies(obj[i], true, xref).then(
|
||||
newObj => (obj[i] = newObj)
|
||||
)
|
||||
);
|
||||
}
|
||||
await Promise.all(promises);
|
||||
return obj;
|
||||
}
|
||||
let dict;
|
||||
if (obj instanceof BaseStream) {
|
||||
({ dict } = obj = obj.getOriginalStream().clone());
|
||||
} else if (obj instanceof Dict) {
|
||||
if (mustClone) {
|
||||
obj = obj.clone();
|
||||
}
|
||||
dict = obj;
|
||||
}
|
||||
if (dict) {
|
||||
for (const [key, rawObj] of dict.getRawEntries()) {
|
||||
promises.push(
|
||||
this.#collectDependencies(rawObj, true, xref).then(newObj =>
|
||||
dict.set(key, newObj)
|
||||
)
|
||||
);
|
||||
}
|
||||
await Promise.all(promises);
|
||||
}
|
||||
|
||||
return obj;
|
||||
}
|
||||
|
||||
/**
|
||||
* @typedef {Object} PageInfo
|
||||
* @property {PDFDocument} document
|
||||
* @property {Array<Array<number>|number>} [includePages]
|
||||
* included ranges (inclusive) or indices.
|
||||
* @property {Array<Array<number>|number>} [excludePages]
|
||||
* excluded ranges (inclusive) or indices.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Extract pages from the given documents.
|
||||
* @param {Array<PageInfo>} pageInfos
|
||||
* @return {Promise<void>}
|
||||
*/
|
||||
async extractPages(pageInfos) {
|
||||
const promises = [];
|
||||
let newIndex = 0;
|
||||
this.hasSingleFile = pageInfos.length === 1;
|
||||
for (const { document, includePages, excludePages } of pageInfos) {
|
||||
if (!document) {
|
||||
continue;
|
||||
}
|
||||
const documentData = new DocumentData(document);
|
||||
promises.push(this.#collectDocumentData(documentData));
|
||||
let keptIndices, keptRanges, deletedIndices, deletedRanges;
|
||||
for (const page of includePages || []) {
|
||||
if (Array.isArray(page)) {
|
||||
(keptRanges ||= []).push(page);
|
||||
} else {
|
||||
(keptIndices ||= new Set()).add(page);
|
||||
}
|
||||
}
|
||||
for (const page of excludePages || []) {
|
||||
if (Array.isArray(page)) {
|
||||
(deletedRanges ||= []).push(page);
|
||||
} else {
|
||||
(deletedIndices ||= new Set()).add(page);
|
||||
}
|
||||
}
|
||||
for (let i = 0, ii = document.numPages; i < ii; i++) {
|
||||
if (deletedIndices?.has(i)) {
|
||||
continue;
|
||||
}
|
||||
if (deletedRanges) {
|
||||
let isDeleted = false;
|
||||
for (const [start, end] of deletedRanges) {
|
||||
if (i >= start && i <= end) {
|
||||
isDeleted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (isDeleted) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let takePage = false;
|
||||
if (keptIndices) {
|
||||
takePage = keptIndices.has(i);
|
||||
}
|
||||
if (!takePage && keptRanges) {
|
||||
for (const [start, end] of keptRanges) {
|
||||
if (i >= start && i <= end) {
|
||||
takePage = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!takePage && !keptIndices && !keptRanges) {
|
||||
takePage = true;
|
||||
}
|
||||
if (!takePage) {
|
||||
continue;
|
||||
}
|
||||
const newPageIndex = newIndex++;
|
||||
promises.push(
|
||||
document.getPage(i).then(page => {
|
||||
this.oldPages[newPageIndex] = new PageData(page, documentData);
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
await Promise.all(promises);
|
||||
promises.length = 0;
|
||||
|
||||
for (const page of this.oldPages) {
|
||||
promises.push(this.#postCollectPageData(page));
|
||||
}
|
||||
await Promise.all(promises);
|
||||
|
||||
for (let i = 0, ii = this.oldPages.length; i < ii; i++) {
|
||||
this.newPages[i] = await this.#makePageCopy(i, null);
|
||||
}
|
||||
|
||||
return this.writePDF();
|
||||
}
|
||||
|
||||
/**
|
||||
* Collect the document data.
|
||||
* @param {DocumentData} documentData
|
||||
* @return {Promise<void>}
|
||||
*/
|
||||
async #collectDocumentData(documentData) {}
|
||||
|
||||
/**
|
||||
* Post process the collected page data.
|
||||
* @param {PageData} pageData
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async #postCollectPageData(pageData) {
|
||||
const {
|
||||
page: { xref, annotations },
|
||||
} = pageData;
|
||||
|
||||
if (!annotations) {
|
||||
return;
|
||||
}
|
||||
|
||||
const promises = [];
|
||||
let newAnnotations = [];
|
||||
let newIndex = 0;
|
||||
|
||||
// TODO: remove only links to deleted pages.
|
||||
for (const annotationRef of annotations) {
|
||||
const newAnnotationIndex = newIndex++;
|
||||
promises.push(
|
||||
xref.fetchIfRefAsync(annotationRef).then(async annotationDict => {
|
||||
if (!isName(annotationDict.get("Subtype"), "Link")) {
|
||||
newAnnotations[newAnnotationIndex] = annotationRef;
|
||||
}
|
||||
})
|
||||
);
|
||||
}
|
||||
await Promise.all(promises);
|
||||
newAnnotations = newAnnotations.filter(annot => !!annot);
|
||||
pageData.annotations = newAnnotations.length > 0 ? newAnnotations : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a copy of a page.
|
||||
* @param {number} pageIndex
|
||||
* @returns {Promise<Ref>} the page reference in the new PDF document.
|
||||
*/
|
||||
async #makePageCopy(pageIndex) {
|
||||
const { page, documentData, annotations } = this.oldPages[pageIndex];
|
||||
this.currentDocument = documentData;
|
||||
const { oldRefMapping } = documentData;
|
||||
const { xref, rotate, mediaBox, resources, ref: oldPageRef } = page;
|
||||
const pageRef = this.newRef;
|
||||
const pageDict = (this.xref[pageRef.num] = page.pageDict.clone());
|
||||
oldRefMapping.put(oldPageRef, pageRef);
|
||||
|
||||
// No need to keep these entries as we'll set them again later.
|
||||
for (const key of [
|
||||
"Rotate",
|
||||
"MediaBox",
|
||||
"CropBox",
|
||||
"BleedBox",
|
||||
"TrimBox",
|
||||
"ArtBox",
|
||||
"Resources",
|
||||
"Annots",
|
||||
"Parent",
|
||||
"UserUnit",
|
||||
]) {
|
||||
pageDict.delete(key);
|
||||
}
|
||||
|
||||
const lastRef = this.newRefCount;
|
||||
await this.#collectDependencies(pageDict, false, xref);
|
||||
|
||||
pageDict.set("Rotate", rotate);
|
||||
pageDict.set("MediaBox", mediaBox);
|
||||
for (const boxName of ["CropBox", "BleedBox", "TrimBox", "ArtBox"]) {
|
||||
const box = page.getBoundingBox(boxName);
|
||||
if (box?.some((value, index) => value !== mediaBox[index])) {
|
||||
// These boxes are optional and their default value is the MediaBox.
|
||||
pageDict.set(boxName, box);
|
||||
}
|
||||
}
|
||||
const userUnit = page.userUnit;
|
||||
if (userUnit !== 1) {
|
||||
pageDict.set("UserUnit", userUnit);
|
||||
}
|
||||
pageDict.setIfDict(
|
||||
"Resources",
|
||||
await this.#collectDependencies(resources, true, xref)
|
||||
);
|
||||
pageDict.setIfArray(
|
||||
"Annots",
|
||||
await this.#collectDependencies(annotations, true, xref)
|
||||
);
|
||||
|
||||
if (this.useObjectStreams) {
|
||||
const newLastRef = this.newRefCount;
|
||||
const pageObjectRefs = [];
|
||||
for (let i = lastRef; i < newLastRef; i++) {
|
||||
const obj = this.xref[i];
|
||||
if (obj instanceof BaseStream) {
|
||||
continue;
|
||||
}
|
||||
pageObjectRefs.push(Ref.get(i, 0));
|
||||
}
|
||||
for (let i = 0; i < pageObjectRefs.length; i += 0xffff) {
|
||||
const objStreamRef = this.newRef;
|
||||
this.objStreamRefs.add(objStreamRef.num);
|
||||
this.xref[objStreamRef.num] = pageObjectRefs.slice(i, i + 0xffff);
|
||||
}
|
||||
}
|
||||
|
||||
this.currentDocument = null;
|
||||
|
||||
return pageRef;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the page tree structure.
|
||||
*/
|
||||
#makePageTree() {
|
||||
const { newPages: pages, rootDict, pagesRef, pagesDict } = this;
|
||||
rootDict.set("Pages", pagesRef);
|
||||
pagesDict.setIfName("Type", "Pages");
|
||||
pagesDict.set("Count", pages.length);
|
||||
|
||||
const maxLeaves =
|
||||
MAX_LEAVES_PER_PAGES_NODE <= 1 ? pages.length : MAX_LEAVES_PER_PAGES_NODE;
|
||||
const stack = [{ dict: pagesDict, kids: pages, parentRef: pagesRef }];
|
||||
|
||||
while (stack.length > 0) {
|
||||
const { dict, kids, parentRef } = stack.pop();
|
||||
if (kids.length <= maxLeaves) {
|
||||
dict.set("Kids", kids);
|
||||
for (const ref of kids) {
|
||||
this.xref[ref.num].set("Parent", parentRef);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
const chunkSize = Math.max(maxLeaves, Math.ceil(kids.length / maxLeaves));
|
||||
const kidsChunks = [];
|
||||
for (let i = 0; i < kids.length; i += chunkSize) {
|
||||
kidsChunks.push(kids.slice(i, i + chunkSize));
|
||||
}
|
||||
const kidsRefs = [];
|
||||
dict.set("Kids", kidsRefs);
|
||||
for (const chunk of kidsChunks) {
|
||||
const [kidRef, kidDict] = this.newDict;
|
||||
kidsRefs.push(kidRef);
|
||||
kidDict.setIfName("Type", "Pages");
|
||||
kidDict.set("Parent", parentRef);
|
||||
kidDict.set("Count", chunk.length);
|
||||
stack.push({ dict: kidDict, kids: chunk, parentRef: kidRef });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the root dictionary.
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async #makeRoot() {
|
||||
const { rootDict } = this;
|
||||
rootDict.setIfName("Type", "Catalog");
|
||||
rootDict.set("Version", this.version);
|
||||
this.#makePageTree();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the info dictionary.
|
||||
* @returns {Map} infoMap
|
||||
*/
|
||||
#makeInfo() {
|
||||
const infoMap = new Map();
|
||||
if (this.hasSingleFile) {
|
||||
const {
|
||||
xref: { trailer },
|
||||
} = this.oldPages[0].documentData.document;
|
||||
const oldInfoDict = trailer.get("Info");
|
||||
for (const [key, value] of oldInfoDict || []) {
|
||||
if (typeof value === "string") {
|
||||
infoMap.set(key, stringToPDFString(value));
|
||||
}
|
||||
}
|
||||
}
|
||||
infoMap.delete("ModDate");
|
||||
infoMap.set("CreationDate", getModificationDate());
|
||||
infoMap.set("Creator", "PDF.js");
|
||||
infoMap.set("Producer", "Firefox");
|
||||
|
||||
if (this.author) {
|
||||
infoMap.set("Author", this.author);
|
||||
}
|
||||
if (this.title) {
|
||||
infoMap.set("Title", this.title);
|
||||
}
|
||||
for (const [key, value] of infoMap) {
|
||||
this.infoDict.set(key, stringToAsciiOrUTF16BE(value));
|
||||
}
|
||||
return infoMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the encryption dictionary if required.
|
||||
* @returns {Promise<[Dict|null, CipherTransformFactory|null, Array|null]>}
|
||||
*/
|
||||
async #makeEncrypt() {
|
||||
if (!this.hasSingleFile) {
|
||||
return [null, null, null];
|
||||
}
|
||||
const { documentData } = this.oldPages[0];
|
||||
const {
|
||||
document: {
|
||||
xref: { trailer, encrypt },
|
||||
},
|
||||
} = documentData;
|
||||
if (!trailer.has("Encrypt")) {
|
||||
return [null, null, null];
|
||||
}
|
||||
const encryptDict = trailer.get("Encrypt");
|
||||
if (!(encryptDict instanceof Dict)) {
|
||||
return [null, null, null];
|
||||
}
|
||||
this.currentDocument = documentData;
|
||||
const result = [
|
||||
await this.#cloneObject(encryptDict, trailer.xref),
|
||||
encrypt,
|
||||
trailer.get("ID"),
|
||||
];
|
||||
this.currentDocument = null;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the changes required to write the new PDF document.
|
||||
* @returns {Promise<[RefSetCache, Ref]>}
|
||||
*/
|
||||
async #createChanges() {
|
||||
const changes = new RefSetCache();
|
||||
changes.put(Ref.get(0, 0xffff), { data: null });
|
||||
for (let i = 1, ii = this.xref.length; i < ii; i++) {
|
||||
if (this.objStreamRefs?.has(i)) {
|
||||
await this.#createObjectStream(Ref.get(i, 0), this.xref[i], changes);
|
||||
} else {
|
||||
changes.put(Ref.get(i, 0), { data: this.xref[i] });
|
||||
}
|
||||
}
|
||||
|
||||
return [changes, this.newRef];
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an object stream containing the given objects.
|
||||
* @param {Ref} objStreamRef
|
||||
* @param {Array<Ref>} objRefs
|
||||
* @param {RefSetCache} changes
|
||||
*/
|
||||
async #createObjectStream(objStreamRef, objRefs, changes) {
|
||||
const streamBuffer = [""];
|
||||
const objOffsets = [];
|
||||
let offset = 0;
|
||||
const buffer = [];
|
||||
for (let i = 0, ii = objRefs.length; i < ii; i++) {
|
||||
const objRef = objRefs[i];
|
||||
changes.put(objRef, { data: null, objStreamRef, index: i });
|
||||
objOffsets.push(`${objRef.num} ${offset}`);
|
||||
const data = this.xref[objRef.num];
|
||||
await writeValue(data, buffer, /* transform = */ null);
|
||||
const obj = buffer.join("");
|
||||
buffer.length = 0;
|
||||
streamBuffer.push(obj);
|
||||
offset += obj.length + 1;
|
||||
}
|
||||
streamBuffer[0] = objOffsets.join("\n");
|
||||
const objStream = new StringStream(streamBuffer.join("\n"));
|
||||
const objStreamDict = (objStream.dict = new Dict());
|
||||
objStreamDict.setIfName("Type", "ObjStm");
|
||||
objStreamDict.set("N", objRefs.length);
|
||||
objStreamDict.set("First", streamBuffer[0].length + 1);
|
||||
|
||||
changes.put(objStreamRef, { data: objStream });
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the new PDF document to a Uint8Array.
|
||||
* @returns {Promise<Uint8Array>}
|
||||
*/
|
||||
async writePDF() {
|
||||
await this.#makeRoot();
|
||||
const infoMap = this.#makeInfo();
|
||||
const [encryptRef, encrypt, fileIds] = await this.#makeEncrypt();
|
||||
const [changes, xrefTableRef] = await this.#createChanges();
|
||||
|
||||
// Create the PDF header in order to help sniffers.
|
||||
// PDF version must be in the range 1.0 to 1.7 inclusive.
|
||||
// We add a binary comment line to ensure that the file is treated
|
||||
// as a binary file by applications that open it.
|
||||
const header = [
|
||||
...`%PDF-${this.version}\n%`.split("").map(c => c.charCodeAt(0)),
|
||||
0xfa,
|
||||
0xde,
|
||||
0xfa,
|
||||
0xce,
|
||||
];
|
||||
return incrementalUpdate({
|
||||
originalData: new Uint8Array(header),
|
||||
changes,
|
||||
xrefInfo: {
|
||||
startXRef: null,
|
||||
rootRef: this.rootRef,
|
||||
infoRef: this.infoRef,
|
||||
encryptRef,
|
||||
newRef: xrefTableRef,
|
||||
fileIds: fileIds || [null, null],
|
||||
infoMap,
|
||||
},
|
||||
useXrefStream: this.useObjectStreams,
|
||||
xref: {
|
||||
encrypt,
|
||||
encryptRef,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export { PDFEditor };
|
||||
@ -188,6 +188,10 @@ class Dict {
|
||||
return [...this._map.values()];
|
||||
}
|
||||
|
||||
getRawEntries() {
|
||||
return this._map.entries();
|
||||
}
|
||||
|
||||
set(key, value) {
|
||||
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
|
||||
if (typeof key !== "string") {
|
||||
@ -231,6 +235,12 @@ class Dict {
|
||||
}
|
||||
}
|
||||
|
||||
setIfDict(key, value) {
|
||||
if (value instanceof Dict) {
|
||||
this.set(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
has(key) {
|
||||
return this._map.has(key);
|
||||
}
|
||||
|
||||
@ -82,6 +82,15 @@ class Stream extends BaseStream {
|
||||
makeSubStream(start, length, dict = null) {
|
||||
return new Stream(this.bytes.buffer, start, length, dict);
|
||||
}
|
||||
|
||||
clone() {
|
||||
return new Stream(
|
||||
this.bytes.buffer,
|
||||
this.start,
|
||||
this.end - this.start,
|
||||
this.dict.clone()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
class StringStream extends Stream {
|
||||
|
||||
@ -36,6 +36,7 @@ import { MessageHandler, wrapReason } from "../shared/message_handler.js";
|
||||
import { AnnotationFactory } from "./annotation.js";
|
||||
import { clearGlobalCaches } from "./cleanup_helper.js";
|
||||
import { incrementalUpdate } from "./writer.js";
|
||||
import { PDFEditor } from "./editor/pdf_editor.js";
|
||||
import { PDFWorkerStream } from "./worker_stream.js";
|
||||
import { StructTreeRoot } from "./struct_tree.js";
|
||||
|
||||
@ -557,6 +558,97 @@ class WorkerMessageHandler {
|
||||
return pdfManager.ensureDoc("calculationOrderIds");
|
||||
});
|
||||
|
||||
handler.on("ExtractPages", async function ({ pageInfos }) {
|
||||
if (!pageInfos) {
|
||||
warn("extractPages: nothing to extract.");
|
||||
return null;
|
||||
}
|
||||
if (!Array.isArray(pageInfos)) {
|
||||
pageInfos = [pageInfos];
|
||||
}
|
||||
let newDocumentId = 0;
|
||||
for (const pageInfo of pageInfos) {
|
||||
if (pageInfo.document === null) {
|
||||
pageInfo.document = pdfManager.pdfDocument;
|
||||
} else if (ArrayBuffer.isView(pageInfo.document)) {
|
||||
const manager = new LocalPdfManager({
|
||||
source: pageInfo.document,
|
||||
docId: `${docId}_extractPages_${newDocumentId++}`,
|
||||
handler,
|
||||
password: pageInfo.password ?? null,
|
||||
evaluatorOptions: Object.assign({}, pdfManager.evaluatorOptions),
|
||||
});
|
||||
let recoveryMode = false;
|
||||
let isValid = true;
|
||||
while (true) {
|
||||
try {
|
||||
await manager.requestLoadedStream();
|
||||
await manager.ensureDoc("checkHeader");
|
||||
await manager.ensureDoc("parseStartXRef");
|
||||
await manager.ensureDoc("parse", [recoveryMode]);
|
||||
break;
|
||||
} catch (e) {
|
||||
if (e instanceof XRefParseException) {
|
||||
if (recoveryMode === false) {
|
||||
recoveryMode = true;
|
||||
continue;
|
||||
} else {
|
||||
isValid = false;
|
||||
warn("extractPages: XRefParseException.");
|
||||
}
|
||||
} else if (e instanceof PasswordException) {
|
||||
const task = new WorkerTask(
|
||||
`PasswordException: response ${e.code}`
|
||||
);
|
||||
|
||||
startWorkerTask(task);
|
||||
|
||||
try {
|
||||
const { password } = await handler.sendWithPromise(
|
||||
"PasswordRequest",
|
||||
e
|
||||
);
|
||||
manager.updatePassword(password);
|
||||
} catch {
|
||||
isValid = false;
|
||||
warn("extractPages: invalid password.");
|
||||
} finally {
|
||||
finishWorkerTask(task);
|
||||
}
|
||||
} else {
|
||||
isValid = false;
|
||||
warn("extractPages: invalid document.");
|
||||
}
|
||||
if (!isValid) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!isValid) {
|
||||
pageInfo.document = null;
|
||||
}
|
||||
const isPureXfa = await manager.ensureDoc("isPureXfa");
|
||||
if (isPureXfa) {
|
||||
pageInfo.document = null;
|
||||
warn("extractPages does not support pure XFA documents.");
|
||||
} else {
|
||||
pageInfo.document = manager.pdfDocument;
|
||||
}
|
||||
} else {
|
||||
warn("extractPages: invalid document.");
|
||||
}
|
||||
}
|
||||
try {
|
||||
const pdfEditor = new PDFEditor();
|
||||
const buffer = await pdfEditor.extractPages(pageInfos);
|
||||
return buffer;
|
||||
} catch (reason) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.error(reason);
|
||||
return null;
|
||||
}
|
||||
});
|
||||
|
||||
handler.on(
|
||||
"SaveDocument",
|
||||
async function ({ isPureXfa, numPages, annotationStorage, filename }) {
|
||||
|
||||
@ -19,7 +19,6 @@ import {
|
||||
escapePDFName,
|
||||
escapeString,
|
||||
getSizeInBytes,
|
||||
numberToString,
|
||||
parseXFAPath,
|
||||
} from "./core_utils.js";
|
||||
import { SimpleDOMNode, SimpleXMLParser } from "./xml_parser.js";
|
||||
@ -27,29 +26,34 @@ import { Stream, StringStream } from "./stream.js";
|
||||
import { BaseStream } from "./base_stream.js";
|
||||
import { calculateMD5 } from "./calculate_md5.js";
|
||||
|
||||
async function writeObject(ref, obj, buffer, { encrypt = null }) {
|
||||
const transform = encrypt?.createCipherTransform(ref.num, ref.gen);
|
||||
async function writeObject(
|
||||
ref,
|
||||
obj,
|
||||
buffer,
|
||||
{ encrypt = null, encryptRef = null }
|
||||
) {
|
||||
// Avoid to encrypt the encrypt dictionary.
|
||||
const transform =
|
||||
encrypt && encryptRef !== ref
|
||||
? encrypt.createCipherTransform(ref.num, ref.gen)
|
||||
: null;
|
||||
buffer.push(`${ref.num} ${ref.gen} obj\n`);
|
||||
if (obj instanceof Dict) {
|
||||
await writeDict(obj, buffer, transform);
|
||||
} else if (obj instanceof BaseStream) {
|
||||
await writeStream(obj, buffer, transform);
|
||||
} else if (Array.isArray(obj) || ArrayBuffer.isView(obj)) {
|
||||
await writeArray(obj, buffer, transform);
|
||||
}
|
||||
await writeValue(obj, buffer, transform);
|
||||
buffer.push("\nendobj\n");
|
||||
}
|
||||
|
||||
async function writeDict(dict, buffer, transform) {
|
||||
buffer.push("<<");
|
||||
for (const key of dict.getKeys()) {
|
||||
for (const [key, rawObj] of dict.getRawEntries()) {
|
||||
buffer.push(` /${escapePDFName(key)} `);
|
||||
await writeValue(dict.getRaw(key), buffer, transform);
|
||||
await writeValue(rawObj, buffer, transform);
|
||||
}
|
||||
buffer.push(">>");
|
||||
}
|
||||
|
||||
async function writeStream(stream, buffer, transform) {
|
||||
stream = stream.getOriginalStream();
|
||||
stream.reset();
|
||||
let bytes = stream.getBytes();
|
||||
const { dict } = stream;
|
||||
|
||||
@ -67,7 +71,7 @@ async function writeStream(stream, buffer, transform) {
|
||||
// The number 256 is arbitrary, but it should be reasonable.
|
||||
const MIN_LENGTH_FOR_COMPRESSING = 256;
|
||||
|
||||
if (bytes.length >= MIN_LENGTH_FOR_COMPRESSING || isFilterZeroFlateDecode) {
|
||||
if (bytes.length >= MIN_LENGTH_FOR_COMPRESSING && !isFilterZeroFlateDecode) {
|
||||
try {
|
||||
const cs = new CompressionStream("deflate");
|
||||
const writer = cs.writable.getWriter();
|
||||
@ -120,14 +124,11 @@ async function writeStream(stream, buffer, transform) {
|
||||
|
||||
async function writeArray(array, buffer, transform) {
|
||||
buffer.push("[");
|
||||
let first = true;
|
||||
for (const val of array) {
|
||||
if (!first) {
|
||||
for (let i = 0, ii = array.length; i < ii; i++) {
|
||||
await writeValue(array[i], buffer, transform);
|
||||
if (i < ii - 1) {
|
||||
buffer.push(" ");
|
||||
} else {
|
||||
first = false;
|
||||
}
|
||||
await writeValue(val, buffer, transform);
|
||||
}
|
||||
buffer.push("]");
|
||||
}
|
||||
@ -145,7 +146,11 @@ async function writeValue(value, buffer, transform) {
|
||||
}
|
||||
buffer.push(`(${escapeString(value)})`);
|
||||
} else if (typeof value === "number") {
|
||||
buffer.push(numberToString(value));
|
||||
// Don't try to round numbers in general, it could lead to have degenerate
|
||||
// matrices (e.g. [0.000008 0 0 0.000008 0 0]).
|
||||
// The numbers must be "rounded" only when pdf.js is producing them and the
|
||||
// current transformation matrix is well known.
|
||||
buffer.push(value.toString());
|
||||
} else if (typeof value === "boolean") {
|
||||
buffer.push(value.toString());
|
||||
} else if (value instanceof Dict) {
|
||||
@ -306,7 +311,7 @@ async function getXRefTable(xrefInfo, baseOffset, newRefs, newXref, buffer) {
|
||||
}
|
||||
computeIDs(baseOffset, xrefInfo, newXref);
|
||||
buffer.push("trailer\n");
|
||||
await writeDict(newXref, buffer);
|
||||
await writeDict(newXref, buffer, null);
|
||||
buffer.push("\nstartxref\n", baseOffset.toString(), "\n%%EOF\n");
|
||||
}
|
||||
|
||||
@ -332,10 +337,17 @@ async function getXRefStreamTable(
|
||||
const xrefTableData = [];
|
||||
let maxOffset = 0;
|
||||
let maxGen = 0;
|
||||
for (const { ref, data } of newRefs) {
|
||||
for (const { ref, data, objStreamRef, index } of newRefs) {
|
||||
let gen;
|
||||
maxOffset = Math.max(maxOffset, baseOffset);
|
||||
if (data !== null) {
|
||||
// The first number in each entry is the type (see 7.5.8.3):
|
||||
// 0: free object
|
||||
// 1: in-use object
|
||||
// 2: compressed object
|
||||
if (objStreamRef) {
|
||||
gen = index;
|
||||
xrefTableData.push([2, objStreamRef.num, gen]);
|
||||
} else if (data !== null) {
|
||||
gen = Math.min(ref.gen, 0xffff);
|
||||
xrefTableData.push([1, baseOffset, gen]);
|
||||
baseOffset += data.length;
|
||||
@ -371,13 +383,13 @@ async function getXRefStreamTable(
|
||||
function computeIDs(baseOffset, xrefInfo, newXref) {
|
||||
if (Array.isArray(xrefInfo.fileIds) && xrefInfo.fileIds.length > 0) {
|
||||
const md5 = computeMD5(baseOffset, xrefInfo);
|
||||
newXref.set("ID", [xrefInfo.fileIds[0], md5]);
|
||||
newXref.set("ID", [xrefInfo.fileIds[0] || md5, md5]);
|
||||
}
|
||||
}
|
||||
|
||||
function getTrailerDict(xrefInfo, changes, useXrefStream) {
|
||||
const newXref = new Dict(null);
|
||||
newXref.set("Prev", xrefInfo.startXRef);
|
||||
newXref.setIfDefined("Prev", xrefInfo?.startXRef);
|
||||
const refForXrefTable = xrefInfo.newRef;
|
||||
if (useXrefStream) {
|
||||
changes.put(refForXrefTable, { data: "" });
|
||||
@ -386,21 +398,20 @@ function getTrailerDict(xrefInfo, changes, useXrefStream) {
|
||||
} else {
|
||||
newXref.set("Size", refForXrefTable.num);
|
||||
}
|
||||
if (xrefInfo.rootRef !== null) {
|
||||
newXref.set("Root", xrefInfo.rootRef);
|
||||
}
|
||||
if (xrefInfo.infoRef !== null) {
|
||||
newXref.set("Info", xrefInfo.infoRef);
|
||||
}
|
||||
if (xrefInfo.encryptRef !== null) {
|
||||
newXref.set("Encrypt", xrefInfo.encryptRef);
|
||||
}
|
||||
newXref.setIfDefined("Root", xrefInfo?.rootRef);
|
||||
newXref.setIfDefined("Info", xrefInfo?.infoRef);
|
||||
newXref.setIfDefined("Encrypt", xrefInfo?.encryptRef);
|
||||
|
||||
return newXref;
|
||||
}
|
||||
|
||||
async function writeChanges(changes, xref, buffer = []) {
|
||||
const newRefs = [];
|
||||
for (const [ref, { data }] of changes.items()) {
|
||||
for (const [ref, { data, objStreamRef, index }] of changes.items()) {
|
||||
if (objStreamRef) {
|
||||
newRefs.push({ ref, data, objStreamRef, index });
|
||||
continue;
|
||||
}
|
||||
if (data === null || typeof data === "string") {
|
||||
newRefs.push({ ref, data });
|
||||
continue;
|
||||
@ -483,4 +494,4 @@ async function incrementalUpdate({
|
||||
return array;
|
||||
}
|
||||
|
||||
export { incrementalUpdate, writeChanges, writeDict, writeObject };
|
||||
export { incrementalUpdate, writeChanges, writeDict, writeObject, writeValue };
|
||||
|
||||
@ -1025,6 +1025,24 @@ class PDFDocumentProxy {
|
||||
return this._transport.saveDocument();
|
||||
}
|
||||
|
||||
/**
|
||||
* @typedef {Object} PageInfo
|
||||
* @property {null|Uint8Array} document
|
||||
* @property {Array<Array<number>|number>} [includePages]
|
||||
* included ranges or indices.
|
||||
* @property {Array<Array<number>|number>} [excludePages]
|
||||
* excluded ranges or indices.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @param {Array<PageInfo>} pageInfos - The pages to extract.
|
||||
* @returns {Promise<Uint8Array>} A promise that is resolved with a
|
||||
* {Uint8Array} containing the full data of the saved document.
|
||||
*/
|
||||
extractPages(pageInfos) {
|
||||
return this._transport.extractPages(pageInfos);
|
||||
}
|
||||
|
||||
/**
|
||||
* @returns {Promise<{ length: number }>} A promise that is resolved when the
|
||||
* document's data is loaded. It is resolved with an {Object} that contains
|
||||
@ -2900,6 +2918,10 @@ class WorkerTransport {
|
||||
});
|
||||
}
|
||||
|
||||
extractPages(pageInfos) {
|
||||
return this.messageHandler.sendWithPromise("ExtractPages", { pageInfos });
|
||||
}
|
||||
|
||||
getPage(pageNumber) {
|
||||
if (
|
||||
!Number.isInteger(pageNumber) ||
|
||||
|
||||
@ -506,6 +506,7 @@ class Driver {
|
||||
this.inFlightRequests = 0;
|
||||
this.testFilter = JSON.parse(params.get("testfilter") || "[]");
|
||||
this.xfaOnly = params.get("xfaonly") === "true";
|
||||
this.masterMode = params.get("mastermode") === "true";
|
||||
|
||||
// Create a working canvas
|
||||
this.canvas = document.createElement("canvas");
|
||||
@ -591,6 +592,25 @@ class Driver {
|
||||
task.stats = { times: [] };
|
||||
task.enableXfa = task.enableXfa === true;
|
||||
|
||||
if (task.includePages && task.type === "extract") {
|
||||
if (this.masterMode) {
|
||||
const includePages = [];
|
||||
for (const page of task.includePages) {
|
||||
if (Array.isArray(page)) {
|
||||
for (let i = page[0]; i <= page[1]; i++) {
|
||||
includePages.push(i);
|
||||
}
|
||||
} else {
|
||||
includePages.push(page);
|
||||
}
|
||||
}
|
||||
task.numberOfTasks = includePages.length;
|
||||
task.includePages = includePages;
|
||||
} else {
|
||||
delete task.pageMapping;
|
||||
}
|
||||
}
|
||||
|
||||
const prevFile = md5FileMap.get(task.md5);
|
||||
if (prevFile) {
|
||||
if (task.file !== prevFile) {
|
||||
@ -658,6 +678,20 @@ class Driver {
|
||||
});
|
||||
let promise = loadingTask.promise;
|
||||
|
||||
if (!this.masterMode && task.type === "extract") {
|
||||
promise = promise.then(async doc => {
|
||||
const data = await doc.extractPages([
|
||||
{
|
||||
document: null,
|
||||
includePages: task.includePages,
|
||||
},
|
||||
]);
|
||||
await loadingTask.destroy();
|
||||
delete task.includePages;
|
||||
return getDocument(data).promise;
|
||||
});
|
||||
}
|
||||
|
||||
if (task.annotationStorage) {
|
||||
for (const annotation of Object.values(task.annotationStorage)) {
|
||||
const { bitmapName, quadPoints, paths, outlines } = annotation;
|
||||
@ -862,7 +896,12 @@ class Driver {
|
||||
}
|
||||
}
|
||||
|
||||
if (task.skipPages?.includes(task.pageNum)) {
|
||||
if (
|
||||
task.skipPages?.includes(task.pageNum) ||
|
||||
(this.masterMode &&
|
||||
task.includePages &&
|
||||
!task.includePages.includes(task.pageNum - 1))
|
||||
) {
|
||||
this._log(
|
||||
` Skipping page ${task.pageNum}/${task.pdfDoc.numPages}...\n`
|
||||
);
|
||||
@ -1274,10 +1313,11 @@ class Driver {
|
||||
id: task.id,
|
||||
numPages: task.pdfDoc ? task.lastPage || task.pdfDoc.numPages : 0,
|
||||
lastPageNum: this._getLastPageNumber(task),
|
||||
numberOfTasks: task.numberOfTasks ?? -1,
|
||||
failure,
|
||||
file: task.file,
|
||||
round: task.round,
|
||||
page: task.pageNum,
|
||||
page: task.pageMapping?.[task.pageNum] ?? task.pageNum,
|
||||
snapshot,
|
||||
baselineSnapshot,
|
||||
stats: task.stats.times,
|
||||
|
||||
3
test/pdfs/.gitignore
vendored
3
test/pdfs/.gitignore
vendored
@ -754,3 +754,6 @@
|
||||
!bug1937438_from_word.pdf
|
||||
!bug1937438_mml_from_latex.pdf
|
||||
!bug1997343.pdf
|
||||
!doc_1_3_pages.pdf
|
||||
!doc_2_3_pages.pdf
|
||||
!doc_3_3_pages.pdf
|
||||
|
||||
BIN
test/pdfs/doc_1_3_pages.pdf
Executable file
BIN
test/pdfs/doc_1_3_pages.pdf
Executable file
Binary file not shown.
BIN
test/pdfs/doc_2_3_pages.pdf
Executable file
BIN
test/pdfs/doc_2_3_pages.pdf
Executable file
Binary file not shown.
BIN
test/pdfs/doc_3_3_pages.pdf
Executable file
BIN
test/pdfs/doc_3_3_pages.pdf
Executable file
Binary file not shown.
@ -672,6 +672,7 @@ function checkRefTestResults(browser, id, results) {
|
||||
case "partial":
|
||||
case "text":
|
||||
case "highlight":
|
||||
case "extract":
|
||||
checkEq(task, results, browser, session.masterMode);
|
||||
break;
|
||||
case "fbf":
|
||||
@ -731,6 +732,7 @@ function refTestPostHandler(parsedUrl, req, res) {
|
||||
var snapshot = data.snapshot;
|
||||
var baselineSnapshot = data.baselineSnapshot;
|
||||
var lastPageNum = data.lastPageNum;
|
||||
var numberOfTasks = data.numberOfTasks;
|
||||
|
||||
session = getSession(browser);
|
||||
monitorBrowserTimeout(session, handleSessionTimeout);
|
||||
@ -773,7 +775,10 @@ function refTestPostHandler(parsedUrl, req, res) {
|
||||
});
|
||||
}
|
||||
|
||||
var isDone = taskResults.at(-1)?.[lastPageNum - 1];
|
||||
const lastTaskResults = taskResults.at(-1);
|
||||
const isDone =
|
||||
lastTaskResults?.[lastPageNum - 1] ||
|
||||
lastTaskResults?.filter(result => !!result).length === numberOfTasks;
|
||||
if (isDone) {
|
||||
checkRefTestResults(browser, id, taskResults);
|
||||
session.remaining--;
|
||||
|
||||
@ -13049,5 +13049,23 @@
|
||||
"rotation": 0
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "tracemonkey-extract_0_2_12",
|
||||
"file": "pdfs/tracemonkey.pdf",
|
||||
"md5": "9a192d8b1a7dc652a19835f6f08098bd",
|
||||
"rounds": 1,
|
||||
"type": "extract",
|
||||
"includePages": [0, 2, 12],
|
||||
"pageMapping": { "1": 1, "3": 2, "13": 3 }
|
||||
},
|
||||
{
|
||||
"id": "bug900822-encrypted-extract_0",
|
||||
"file": "pdfs/bug900822.pdf",
|
||||
"md5": "70e2a3c5922574eeda169c955cf9d084",
|
||||
"rounds": 1,
|
||||
"type": "extract",
|
||||
"includePages": [0],
|
||||
"pageMapping": { "1": 1 }
|
||||
}
|
||||
]
|
||||
|
||||
@ -5335,4 +5335,212 @@ deployment as easy as distributing a source file. They are used for
|
||||
small scripts as well as for`);
|
||||
});
|
||||
});
|
||||
|
||||
describe("PDF page editing", function () {
|
||||
describe("Merge pdfs", function () {
|
||||
it("should merge three PDFs", async function () {
|
||||
const loadingTask = getDocument(
|
||||
buildGetDocumentParams("doc_1_3_pages.pdf")
|
||||
);
|
||||
const pdfDoc = await loadingTask.promise;
|
||||
const pdfData2 = await DefaultFileReaderFactory.fetch({
|
||||
path: TEST_PDFS_PATH + "doc_2_3_pages.pdf",
|
||||
});
|
||||
const pdfData3 = await DefaultFileReaderFactory.fetch({
|
||||
path: TEST_PDFS_PATH + "doc_3_3_pages.pdf",
|
||||
});
|
||||
|
||||
let data = await pdfDoc.extractPages([
|
||||
{ document: null },
|
||||
{ document: pdfData2 },
|
||||
{ document: pdfData3 },
|
||||
]);
|
||||
let newLoadingTask = getDocument(data);
|
||||
let newPdfDoc = await newLoadingTask.promise;
|
||||
expect(newPdfDoc.numPages).toEqual(9);
|
||||
|
||||
for (let i = 1; i <= 9; i++) {
|
||||
const pdfPage = await newPdfDoc.getPage(i);
|
||||
const { items: textItems } = await pdfPage.getTextContent();
|
||||
expect(mergeText(textItems)).toEqual(
|
||||
`Document ${Math.ceil(i / 3)}:Page ${((i - 1) % 3) + 1}`
|
||||
);
|
||||
}
|
||||
await newLoadingTask.destroy();
|
||||
|
||||
data = await pdfDoc.extractPages([
|
||||
{ document: pdfData3 },
|
||||
{ document: pdfData2 },
|
||||
{ document: null },
|
||||
]);
|
||||
newLoadingTask = getDocument(data);
|
||||
newPdfDoc = await newLoadingTask.promise;
|
||||
expect(newPdfDoc.numPages).toEqual(9);
|
||||
for (let i = 1; i <= 9; i++) {
|
||||
const pdfPage = await newPdfDoc.getPage(i);
|
||||
const { items: textItems } = await pdfPage.getTextContent();
|
||||
expect(mergeText(textItems)).toEqual(
|
||||
`Document ${Math.ceil((10 - i) / 3)}:Page ${((i - 1) % 3) + 1}`
|
||||
);
|
||||
}
|
||||
await newLoadingTask.destroy();
|
||||
|
||||
data = await pdfDoc.extractPages([
|
||||
{ document: null, includePages: [0] },
|
||||
{ document: pdfData2, includePages: [0] },
|
||||
{ document: pdfData3, includePages: [0] },
|
||||
]);
|
||||
newLoadingTask = getDocument(data);
|
||||
newPdfDoc = await newLoadingTask.promise;
|
||||
expect(newPdfDoc.numPages).toEqual(3);
|
||||
for (let i = 1; i <= 3; i++) {
|
||||
const pdfPage = await newPdfDoc.getPage(i);
|
||||
const { items: textItems } = await pdfPage.getTextContent();
|
||||
expect(mergeText(textItems)).toEqual(`Document ${i}:Page 1`);
|
||||
}
|
||||
await newLoadingTask.destroy();
|
||||
|
||||
data = await pdfDoc.extractPages([
|
||||
{ document: null, excludePages: [0] },
|
||||
{ document: pdfData2, excludePages: [0] },
|
||||
{ document: pdfData3, excludePages: [0] },
|
||||
]);
|
||||
newLoadingTask = getDocument(data);
|
||||
newPdfDoc = await newLoadingTask.promise;
|
||||
expect(newPdfDoc.numPages).toEqual(6);
|
||||
for (let i = 1; i <= 6; i++) {
|
||||
const pdfPage = await newPdfDoc.getPage(i);
|
||||
const { items: textItems } = await pdfPage.getTextContent();
|
||||
expect(mergeText(textItems)).toEqual(
|
||||
`Document ${Math.ceil(i / 2)}:Page ${((i - 1) % 2) + 2}`
|
||||
);
|
||||
}
|
||||
await newLoadingTask.destroy();
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("should merge two PDFs with page included ranges", async function () {
|
||||
const loadingTask = getDocument(
|
||||
buildGetDocumentParams("tracemonkey.pdf")
|
||||
);
|
||||
const pdfDoc = await loadingTask.promise;
|
||||
const pdfData1 = await DefaultFileReaderFactory.fetch({
|
||||
path: TEST_PDFS_PATH + "doc_1_3_pages.pdf",
|
||||
});
|
||||
|
||||
const data = await pdfDoc.extractPages([
|
||||
{ document: pdfData1, includePages: [[0, 0], 2] },
|
||||
{ document: null, includePages: [[2, 4], 7] },
|
||||
]);
|
||||
const newLoadingTask = getDocument(data);
|
||||
const newPdfDoc = await newLoadingTask.promise;
|
||||
expect(newPdfDoc.numPages).toEqual(6);
|
||||
|
||||
for (let i = 1; i <= 2; i++) {
|
||||
const pdfPage = await newPdfDoc.getPage(i);
|
||||
const { items: textItems } = await pdfPage.getTextContent();
|
||||
expect(mergeText(textItems)).toEqual(`Document 1:Page ${2 * i - 1}`);
|
||||
}
|
||||
|
||||
const expectedPagesText = [
|
||||
"v0 := ld s",
|
||||
"i=4. On th",
|
||||
"resentatio",
|
||||
"5.1 Optimi",
|
||||
];
|
||||
for (let i = 3; i <= 6; i++) {
|
||||
const pdfPage = await newPdfDoc.getPage(i);
|
||||
const { items: textItems } = await pdfPage.getTextContent();
|
||||
const text = mergeText(textItems);
|
||||
expect(text.substring(0, 10)).toEqual(expectedPagesText[i - 3]);
|
||||
}
|
||||
|
||||
await newLoadingTask.destroy();
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("should merge two PDFs with page excluded ranges", async function () {
|
||||
const loadingTask = getDocument(
|
||||
buildGetDocumentParams("tracemonkey.pdf")
|
||||
);
|
||||
const pdfDoc = await loadingTask.promise;
|
||||
const pdfData1 = await DefaultFileReaderFactory.fetch({
|
||||
path: TEST_PDFS_PATH + "doc_1_3_pages.pdf",
|
||||
});
|
||||
|
||||
const data = await pdfDoc.extractPages([
|
||||
{ document: pdfData1, excludePages: [[1, 1]] },
|
||||
{
|
||||
document: null,
|
||||
excludePages: [
|
||||
[0, 1],
|
||||
[5, 6],
|
||||
[8, 13],
|
||||
],
|
||||
},
|
||||
]);
|
||||
const newLoadingTask = getDocument(data);
|
||||
const newPdfDoc = await newLoadingTask.promise;
|
||||
expect(newPdfDoc.numPages).toEqual(6);
|
||||
|
||||
for (let i = 1; i <= 2; i++) {
|
||||
const pdfPage = await newPdfDoc.getPage(i);
|
||||
const { items: textItems } = await pdfPage.getTextContent();
|
||||
expect(mergeText(textItems)).toEqual(`Document 1:Page ${2 * i - 1}`);
|
||||
}
|
||||
|
||||
const expectedPagesText = [
|
||||
"v0 := ld s",
|
||||
"i=4. On th",
|
||||
"resentatio",
|
||||
"5.1 Optimi",
|
||||
];
|
||||
for (let i = 3; i <= 6; i++) {
|
||||
const pdfPage = await newPdfDoc.getPage(i);
|
||||
const { items: textItems } = await pdfPage.getTextContent();
|
||||
const text = mergeText(textItems);
|
||||
expect(text.substring(0, 10)).toEqual(expectedPagesText[i - 3]);
|
||||
}
|
||||
|
||||
await newLoadingTask.destroy();
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("should merge two PDFs with one with a password", async function () {
|
||||
const loadingTask = getDocument(
|
||||
buildGetDocumentParams("doc_1_3_pages.pdf")
|
||||
);
|
||||
const pdfDoc = await loadingTask.promise;
|
||||
const pdfData1 = await DefaultFileReaderFactory.fetch({
|
||||
path: TEST_PDFS_PATH + "pr6531_2.pdf",
|
||||
});
|
||||
|
||||
const data = await pdfDoc.extractPages([
|
||||
{ document: null, includePages: [0] },
|
||||
{ document: pdfData1, password: "asdfasdf" },
|
||||
]);
|
||||
const newLoadingTask = getDocument(data);
|
||||
const newPdfDoc = await newLoadingTask.promise;
|
||||
expect(newPdfDoc.numPages).toEqual(2);
|
||||
|
||||
const expectedPagesText = ["Document 1:Page 1", ""];
|
||||
for (let i = 1; i <= 2; i++) {
|
||||
const pdfPage = await newPdfDoc.getPage(i);
|
||||
const { items: textItems } = await pdfPage.getTextContent();
|
||||
expect(mergeText(textItems)).toEqual(expectedPagesText[i - 1]);
|
||||
}
|
||||
|
||||
const page2 = await newPdfDoc.getPage(2);
|
||||
const annots = await page2.getAnnotations();
|
||||
expect(annots.length).toEqual(1);
|
||||
expect(annots[0].contentsObj.str).toEqual(
|
||||
"Bluebeam should be encrypting this."
|
||||
);
|
||||
|
||||
await newLoadingTask.destroy();
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@ -310,6 +310,16 @@ describe("primitives", function () {
|
||||
expect(rawValues2.sort()).toEqual(expectedRawValues2);
|
||||
});
|
||||
|
||||
it("should get all raw entries", function () {
|
||||
const expectedRawEntries = [
|
||||
["FontFile", testFontFile],
|
||||
["FontFile2", testFontFile2],
|
||||
["FontFile3", testFontFile3],
|
||||
];
|
||||
const rawEntries = Array.from(dictWithManyKeys.getRawEntries());
|
||||
expect(rawEntries.sort()).toEqual(expectedRawEntries);
|
||||
});
|
||||
|
||||
it("should create only one object for Dict.empty", function () {
|
||||
const firstDictEmpty = Dict.empty;
|
||||
const secondDictEmpty = Dict.empty;
|
||||
@ -423,6 +433,12 @@ describe("primitives", function () {
|
||||
|
||||
dict.setIfName("k", 1234);
|
||||
expect(dict.has("k")).toBeFalse();
|
||||
|
||||
dict.setIfDict("l", new Dict());
|
||||
expect(dict.get("l")).toEqual(new Dict());
|
||||
|
||||
dict.setIfDict("m", "not a dict");
|
||||
expect(dict.has("m")).toBeFalse();
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@ -170,8 +170,8 @@ describe("Writer", function () {
|
||||
|
||||
const expected =
|
||||
"<< /A /B /B 123 456 R /C 789 /D (hello world) " +
|
||||
"/E (\\(hello\\\\world\\)) /F [1.23 4.5 6] " +
|
||||
"/G << /H 123 /I << /Length 8>> stream\n" +
|
||||
"/E (\\(hello\\\\world\\)) /F [1.23001 4.50001 6] " +
|
||||
"/G << /H 123.00001 /I << /Length 8>> stream\n" +
|
||||
"a stream\n" +
|
||||
"endstream>> /J true /K false " +
|
||||
"/NullArr [null 10] /NullVal null>>";
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user