From 0ded85e9b3f7cf642ceda73fd2a449c46a64b1a3 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Wed, 7 May 2025 13:41:29 +0200 Subject: [PATCH 1/4] Add a `Page` helper method to create a `PartialEvaluator`-instance Currently we repeat the same identical code five times in the `Page`-class when creating a `PartialEvaluator`-instance, which given the number of parameters it needs seems like unnecessary duplication. --- src/core/document.js | 87 +++++++++++--------------------------------- 1 file changed, 21 insertions(+), 66 deletions(-) diff --git a/src/core/document.js b/src/core/document.js index 0eb0f96d7..724b6b91f 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -125,6 +125,22 @@ class Page { }; } + #createPartialEvaluator(handler) { + return new PartialEvaluator({ + xref: this.xref, + handler, + pageIndex: this.pageIndex, + idFactory: this._localIdFactory, + fontCache: this.fontCache, + builtInCMapCache: this.builtInCMapCache, + standardFontDataCache: this.standardFontDataCache, + globalColorSpaceCache: this.globalColorSpaceCache, + globalImageCache: this.globalImageCache, + systemFontCache: this.systemFontCache, + options: this.evaluatorOptions, + }); + } + /** * @private */ @@ -322,20 +338,7 @@ class Page { if (this.xfaFactory) { throw new Error("XFA: Cannot save new annotations."); } - - const partialEvaluator = new PartialEvaluator({ - xref: this.xref, - handler, - pageIndex: this.pageIndex, - idFactory: this._localIdFactory, - fontCache: this.fontCache, - builtInCMapCache: this.builtInCMapCache, - standardFontDataCache: this.standardFontDataCache, - globalColorSpaceCache: this.globalColorSpaceCache, - globalImageCache: this.globalImageCache, - systemFontCache: this.systemFontCache, - options: this.evaluatorOptions, - }); + const partialEvaluator = this.#createPartialEvaluator(handler); const deletedAnnotations = new RefSetCache(); const existingAnnotations = new RefSet(); @@ -378,19 +381,7 @@ class Page { } async save(handler, task, annotationStorage, changes) { - const partialEvaluator = new PartialEvaluator({ - xref: this.xref, - handler, - pageIndex: this.pageIndex, - idFactory: this._localIdFactory, - fontCache: this.fontCache, - builtInCMapCache: this.builtInCMapCache, - standardFontDataCache: this.standardFontDataCache, - globalColorSpaceCache: this.globalColorSpaceCache, - globalImageCache: this.globalImageCache, - systemFontCache: this.systemFontCache, - options: this.evaluatorOptions, - }); + const partialEvaluator = this.#createPartialEvaluator(handler); // Fetch the page's annotations and save the content // in case of interactive form fields. @@ -450,19 +441,7 @@ class Page { const contentStreamPromise = this.getContentStream(); const resourcesPromise = this.loadResources(RESOURCES_KEYS_OPERATOR_LIST); - const partialEvaluator = new PartialEvaluator({ - xref: this.xref, - handler, - pageIndex: this.pageIndex, - idFactory: this._localIdFactory, - fontCache: this.fontCache, - builtInCMapCache: this.builtInCMapCache, - standardFontDataCache: this.standardFontDataCache, - globalColorSpaceCache: this.globalColorSpaceCache, - globalImageCache: this.globalImageCache, - systemFontCache: this.systemFontCache, - options: this.evaluatorOptions, - }); + const partialEvaluator = this.#createPartialEvaluator(handler); const newAnnotsByPage = !this.xfaFactory ? getNewAnnotationsMap(annotationStorage) @@ -670,19 +649,7 @@ class Page { RESOURCES_KEYS_TEXT_CONTENT ); - const partialEvaluator = new PartialEvaluator({ - xref: this.xref, - handler, - pageIndex: this.pageIndex, - idFactory: this._localIdFactory, - fontCache: this.fontCache, - builtInCMapCache: this.builtInCMapCache, - standardFontDataCache: this.standardFontDataCache, - globalColorSpaceCache: this.globalColorSpaceCache, - globalImageCache: this.globalImageCache, - systemFontCache: this.systemFontCache, - options: this.evaluatorOptions, - }); + const partialEvaluator = this.#createPartialEvaluator(handler); return partialEvaluator.getTextContent({ stream: contentStream, @@ -751,19 +718,7 @@ class Page { } if (annotation.hasTextContent && isVisible) { - partialEvaluator ||= new PartialEvaluator({ - xref: this.xref, - handler, - pageIndex: this.pageIndex, - idFactory: this._localIdFactory, - fontCache: this.fontCache, - builtInCMapCache: this.builtInCMapCache, - standardFontDataCache: this.standardFontDataCache, - globalColorSpaceCache: this.globalColorSpaceCache, - globalImageCache: this.globalImageCache, - systemFontCache: this.systemFontCache, - options: this.evaluatorOptions, - }); + partialEvaluator ??= this.#createPartialEvaluator(handler); textContentPromises.push( annotation From 39803a9f25c61a5c8c4e05e1e2a1bb7ad2489785 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Wed, 7 May 2025 13:41:36 +0200 Subject: [PATCH 2/4] Replace a number of semi-private methods with actual private ones in `src/core/document.js` There's a few remaining cases that are used with either cached getters or `BasePdfManager.prototype.ensure`-methods, and those cannot be converted. --- src/core/document.js | 43 +++++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/src/core/document.js b/src/core/document.js index 724b6b91f..a26cf0f63 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -141,10 +141,7 @@ class Page { }); } - /** - * @private - */ - _getInheritableProperty(key, getArray = false) { + #getInheritableProperty(key, getArray = false) { const value = getInheritableProperty({ dict: this.pageDict, key, @@ -168,7 +165,7 @@ class Page { // For robustness: The spec states that a \Resources entry has to be // present, but can be empty. Some documents still omit it; in this case // we return an empty dictionary. - const resources = this._getInheritableProperty("Resources"); + const resources = this.#getInheritableProperty("Resources"); return shadow( this, @@ -177,12 +174,12 @@ class Page { ); } - _getBoundingBox(name) { + #getBoundingBox(name) { if (this.xfaData) { return this.xfaData.bbox; } const box = lookupNormalRect( - this._getInheritableProperty(name, /* getArray = */ true), + this.#getInheritableProperty(name, /* getArray = */ true), null ); @@ -200,7 +197,7 @@ class Page { return shadow( this, "mediaBox", - this._getBoundingBox("MediaBox") || LETTER_SIZE_MEDIABOX + this.#getBoundingBox("MediaBox") || LETTER_SIZE_MEDIABOX ); } @@ -209,7 +206,7 @@ class Page { return shadow( this, "cropBox", - this._getBoundingBox("CropBox") || this.mediaBox + this.#getBoundingBox("CropBox") || this.mediaBox ); } @@ -240,7 +237,7 @@ class Page { } get rotate() { - let rotate = this._getInheritableProperty("Rotate") || 0; + let rotate = this.#getInheritableProperty("Rotate") || 0; // Normalize rotation so it's a multiple of 90 and between 0 and 270. if (rotate % 90 !== 0) { @@ -255,10 +252,7 @@ class Page { return shadow(this, "rotate", rotate); } - /** - * @private - */ - _onSubStreamError(reason, objId) { + #onSubStreamError(reason, objId) { if (this.evaluatorOptions.ignoreErrors) { warn(`getContentStream - ignoring sub-stream (${objId}): "${reason}".`); return; @@ -278,7 +272,7 @@ class Page { if (Array.isArray(content)) { return new StreamsSequenceStream( content, - this._onSubStreamError.bind(this) + this.#onSubStreamError.bind(this) ); } // Replace non-existent page content with empty content. @@ -405,7 +399,7 @@ class Page { } async loadResources(keys) { - // TODO: add async `_getInheritableProperty` and remove this. + // TODO: add async `#getInheritableProperty` and remove this. await (this.resourcesPromise ??= this.pdfManager.ensure(this, "resources")); await ObjectLoader.load(this.resources, keys, this.xref); @@ -742,7 +736,7 @@ class Page { } get annotations() { - const annots = this._getInheritableProperty("Annots"); + const annots = this.#getInheritableProperty("Annots"); return shadow(this, "annotations", Array.isArray(annots) ? annots : []); } @@ -1045,10 +1039,7 @@ class PDFDocument { return shadow(this, "numPages", num); } - /** - * @private - */ - _hasOnlyDocumentSignatures(fields, recursionDepth = 0) { + #hasOnlyDocumentSignatures(fields, recursionDepth = 0) { const RECURSION_LIMIT = 10; if (!Array.isArray(fields)) { @@ -1061,10 +1052,10 @@ class PDFDocument { } if (field.has("Kids")) { if (++recursionDepth > RECURSION_LIMIT) { - warn("_hasOnlyDocumentSignatures: maximum recursion depth reached"); + warn("#hasOnlyDocumentSignatures: maximum recursion depth reached"); return false; } - return this._hasOnlyDocumentSignatures( + return this.#hasOnlyDocumentSignatures( field.get("Kids"), recursionDepth ); @@ -1393,7 +1384,7 @@ class PDFDocument { const sigFlags = acroForm.get("SigFlags"); const hasSignatures = !!(sigFlags & 0x1); const hasOnlyDocumentSignatures = - hasSignatures && this._hasOnlyDocumentSignatures(fields); + hasSignatures && this.#hasOnlyDocumentSignatures(fields); formInfo.hasAcroForm = hasFields && !hasOnlyDocumentSignatures; formInfo.hasSignatures = hasSignatures; } catch (ex) { @@ -1520,7 +1511,7 @@ class PDFDocument { ]); } - async _getLinearizationPage(pageIndex) { + async #getLinearizationPage(pageIndex) { const { catalog, linearization, xref } = this; if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { assert( @@ -1573,7 +1564,7 @@ class PDFDocument { if (xfaFactory) { promise = Promise.resolve([Dict.empty, null]); } else if (linearization?.pageFirst === pageIndex) { - promise = this._getLinearizationPage(pageIndex); + promise = this.#getLinearizationPage(pageIndex); } else { promise = catalog.getPageDict(pageIndex); } From 92b065c87e9921055382cf7cf66133faa378b995 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Wed, 7 May 2025 13:41:44 +0200 Subject: [PATCH 3/4] Replace a number of semi-private fields with actual private ones in `src/core/document.js` These are fields that can be moved out of their class constructors, and be initialized directly. --- src/core/document.js | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/core/document.js b/src/core/document.js index a26cf0f63..588031965 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -79,6 +79,8 @@ import { XRef } from "./xref.js"; const LETTER_SIZE_MEDIABOX = [0, 0, 612, 792]; class Page { + #resourcesPromise = null; + constructor({ pdfManager, xref, @@ -108,7 +110,6 @@ class Page { this.systemFontCache = systemFontCache; this.nonBlendModesSet = nonBlendModesSet; this.evaluatorOptions = pdfManager.evaluatorOptions; - this.resourcesPromise = null; this.xfaFactory = xfaFactory; const idCounters = { @@ -400,7 +401,10 @@ class Page { async loadResources(keys) { // TODO: add async `#getInheritableProperty` and remove this. - await (this.resourcesPromise ??= this.pdfManager.ensure(this, "resources")); + await (this.#resourcesPromise ??= this.pdfManager.ensure( + this, + "resources" + )); await ObjectLoader.load(this.resources, keys, this.xref); } @@ -876,6 +880,10 @@ function find(stream, signature, limit = 1024, backwards = false) { * The `PDFDocument` class holds all the (worker-thread) data of the PDF file. */ class PDFDocument { + #pagePromises = new Map(); + + #version = null; + constructor(pdfManager, stream) { if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { assert( @@ -892,8 +900,6 @@ class PDFDocument { this.pdfManager = pdfManager; this.stream = stream; this.xref = new XRef(stream, pdfManager); - this._pagePromises = new Map(); - this._version = null; const idCounters = { font: 0, @@ -1014,7 +1020,7 @@ class PDFDocument { } if (PDF_VERSION_REGEXP.test(version)) { - this._version = version; + this.#version = version; } else { warn(`Invalid PDF header version: ${version}`); } @@ -1347,7 +1353,7 @@ class PDFDocument { * the catalog, if present, should overwrite the version from the header. */ get version() { - return this.catalog.version || this._version; + return this.catalog.version || this.#version; } get formInfo() { @@ -1554,7 +1560,7 @@ class PDFDocument { } getPage(pageIndex) { - const cachedPromise = this._pagePromises.get(pageIndex); + const cachedPromise = this.#pagePromises.get(pageIndex); if (cachedPromise) { return cachedPromise; } @@ -1588,7 +1594,7 @@ class PDFDocument { }) ); - this._pagePromises.set(pageIndex, promise); + this.#pagePromises.set(pageIndex, promise); return promise; } @@ -1603,7 +1609,7 @@ class PDFDocument { // Clear out the various caches to ensure that we haven't stored any // inconsistent and/or incorrect state, since that could easily break // subsequent `this.getPage` calls. - this._pagePromises.delete(0); + this.#pagePromises.delete(0); await this.cleanup(); throw new XRefParseException(); @@ -1642,7 +1648,7 @@ class PDFDocument { // Clear out the various caches to ensure that we haven't stored any // inconsistent and/or incorrect state, since that could easily break // subsequent `this.getPage` calls. - this._pagePromises.delete(numPages - 1); + this.#pagePromises.delete(numPages - 1); await this.cleanup(); if (reason instanceof XRefEntryException && !recoveryMode) { @@ -1689,7 +1695,7 @@ class PDFDocument { ); } - this._pagePromises.set(pageIndex, promise); + this.#pagePromises.set(pageIndex, promise); } catalog.setActualNumPages(pagesTree.size); } From 36fafbc05cd5f62e15ac674b43020ee16b4608aa Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Wed, 7 May 2025 13:41:50 +0200 Subject: [PATCH 4/4] Use object destructuring a bit more in the `src/core/document.js` file --- src/core/document.js | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/core/document.js b/src/core/document.js index 588031965..508a42d22 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -1363,7 +1363,7 @@ class PDFDocument { hasXfa: false, hasSignatures: false, }; - const acroForm = this.catalog.acroForm; + const { acroForm } = this.catalog; if (!acroForm) { return shadow(this, "formInfo", formInfo); } @@ -1403,22 +1403,22 @@ class PDFDocument { } get documentInfo() { + const { catalog, formInfo, xref } = this; + const docInfo = { PDFFormatVersion: this.version, - Language: this.catalog.lang, - EncryptFilterName: this.xref.encrypt - ? this.xref.encrypt.filterName - : null, + Language: catalog.lang, + EncryptFilterName: xref.encrypt?.filterName ?? null, IsLinearized: !!this.linearization, - IsAcroFormPresent: this.formInfo.hasAcroForm, - IsXFAPresent: this.formInfo.hasXfa, - IsCollectionPresent: !!this.catalog.collection, - IsSignaturesPresent: this.formInfo.hasSignatures, + IsAcroFormPresent: formInfo.hasAcroForm, + IsXFAPresent: formInfo.hasXfa, + IsCollectionPresent: !!catalog.collection, + IsSignaturesPresent: formInfo.hasSignatures, }; let infoDict; try { - infoDict = this.xref.trailer.get("Info"); + infoDict = xref.trailer.get("Info"); } catch (err) { if (err instanceof MissingDataException) { throw err;